In [1]:

from krmining.clustering import KMeans
from krmining.datasets import make_covid_id
from krmining.datasets import get_example_covid_id
from krmining.clustering import KMeans
from krmining.datasets import make_covid_id
from krmining.datasets import get_example_covid_id

Using datasets data¶

In [2]:

df = make_covid_id()

df.head()
df = make_covid_id()

df.head()

Out[2]:

	Province	Daily_Case	Daily_Death	Daily_Recovered	Daily_Active_Case	Cumulative_Case	Cumulative_Recovered	Cumulative_Death	Cumulative_Active_Case
0	ACEH	11237	449	9785	1003	1995884	1537250	79429	379205
1	BALI	44896	1231	41985	1680	5689598	5100446	151968	437184
2	BANTEN	47451	1207	44197	2047	5276319	4847946	154955	273418
3	BENGKULU	6959	173	5939	847	804077	689618	26581	87878
4	DIY	39824	965	35045	3814	3984843	3098072	95229	791542

In [3]:

df.info()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Province                34 non-null     object
 1   Daily_Case              34 non-null     int64 
 2   Daily_Death             34 non-null     int64 
 3   Daily_Recovered         34 non-null     int64 
 4   Daily_Active_Case       34 non-null     int64 
 5   Cumulative_Case         34 non-null     int64 
 6   Cumulative_Recovered    34 non-null     int64 
 7   Cumulative_Death        34 non-null     int64 
 8   Cumulative_Active_Case  34 non-null     int64 
dtypes: int64(8), object(1)
memory usage: 2.5+ KB

Using examples dataset¶

In [4]:

df = get_example_covid_id()

df.head()
df = get_example_covid_id()

df.head()

Out[4]:

	Daily_Case	Daily_Death
0	11237	449
1	44896	1231
2	47451	1207
3	6959	173
4	39824	965

In [5]:

df.info()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Daily_Case   34 non-null     int64
 1   Daily_Death  34 non-null     int64
dtypes: int64(2)
memory usage: 672.0 bytes

In [6]:

kmeans = KMeans(3)
kmeans.fit(df)
kmeans = KMeans(3)
kmeans.fit(df)

C:\Users\Bina Umat\anaconda3\lib\site-packages\krmining\clustering\_kmeans.py:22: UserWarning: The model still in maintaining in slow or extended memory
  warnings.warn(

Out[6]:

<krmining.clustering._kmeans.KMeans at 0x27022eb7a30>

In [7]:

kmeans.evaluate(df)
kmeans.evaluate(df)

Out[7]:

{'sse_cluster_0': 8959621596.733332,
 'sse_all_cluster': 18654494327.4,
 'sse_cluster_1': 0.0,
 'sse_cluster_2': 9694872730.666668}

In [8]:

kmeans = KMeans(3, init="random")
kmeans.fit(df)
kmeans = KMeans(3, init="random")
kmeans.fit(df)

Out[8]:

<krmining.clustering._kmeans.KMeans at 0x27022ee8910>

In [9]:

kmeans.evaluate(df)
kmeans.evaluate(df)

Out[9]:

{'sse_cluster_0': 1759594376.2916665,
 'sse_all_cluster': 36157681312.8631,
 'sse_cluster_1': 8760559192.57143,
 'sse_cluster_2': 25637527744.0}