In [1]:
Copied!
from krmining.clustering import KMeans
from krmining.datasets import make_covid_id
from krmining.datasets import get_example_covid_id
from krmining.clustering import KMeans
from krmining.datasets import make_covid_id
from krmining.datasets import get_example_covid_id
Using datasets data¶
In [2]:
Copied!
df = make_covid_id()
df.head()
df = make_covid_id()
df.head()
Out[2]:
| Province | Daily_Case | Daily_Death | Daily_Recovered | Daily_Active_Case | Cumulative_Case | Cumulative_Recovered | Cumulative_Death | Cumulative_Active_Case | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | ACEH | 11237 | 449 | 9785 | 1003 | 1995884 | 1537250 | 79429 | 379205 |
| 1 | BALI | 44896 | 1231 | 41985 | 1680 | 5689598 | 5100446 | 151968 | 437184 |
| 2 | BANTEN | 47451 | 1207 | 44197 | 2047 | 5276319 | 4847946 | 154955 | 273418 |
| 3 | BENGKULU | 6959 | 173 | 5939 | 847 | 804077 | 689618 | 26581 | 87878 |
| 4 | DIY | 39824 | 965 | 35045 | 3814 | 3984843 | 3098072 | 95229 | 791542 |
In [3]:
Copied!
df.info()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 34 entries, 0 to 33 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Province 34 non-null object 1 Daily_Case 34 non-null int64 2 Daily_Death 34 non-null int64 3 Daily_Recovered 34 non-null int64 4 Daily_Active_Case 34 non-null int64 5 Cumulative_Case 34 non-null int64 6 Cumulative_Recovered 34 non-null int64 7 Cumulative_Death 34 non-null int64 8 Cumulative_Active_Case 34 non-null int64 dtypes: int64(8), object(1) memory usage: 2.5+ KB
Using examples dataset¶
In [4]:
Copied!
df = get_example_covid_id()
df.head()
df = get_example_covid_id()
df.head()
Out[4]:
| Daily_Case | Daily_Death | |
|---|---|---|
| 0 | 11237 | 449 |
| 1 | 44896 | 1231 |
| 2 | 47451 | 1207 |
| 3 | 6959 | 173 |
| 4 | 39824 | 965 |
In [5]:
Copied!
df.info()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 34 entries, 0 to 33 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Daily_Case 34 non-null int64 1 Daily_Death 34 non-null int64 dtypes: int64(2) memory usage: 672.0 bytes
In [6]:
Copied!
kmeans = KMeans(3)
kmeans.fit(df)
kmeans = KMeans(3)
kmeans.fit(df)
C:\Users\Bina Umat\anaconda3\lib\site-packages\krmining\clustering\_kmeans.py:22: UserWarning: The model still in maintaining in slow or extended memory warnings.warn(
Out[6]:
<krmining.clustering._kmeans.KMeans at 0x27022eb7a30>
In [7]:
Copied!
kmeans.evaluate(df)
kmeans.evaluate(df)
Out[7]:
{'sse_cluster_0': 8959621596.733332,
'sse_all_cluster': 18654494327.4,
'sse_cluster_1': 0.0,
'sse_cluster_2': 9694872730.666668}
In [8]:
Copied!
kmeans = KMeans(3, init="random")
kmeans.fit(df)
kmeans = KMeans(3, init="random")
kmeans.fit(df)
Out[8]:
<krmining.clustering._kmeans.KMeans at 0x27022ee8910>
In [9]:
Copied!
kmeans.evaluate(df)
kmeans.evaluate(df)
Out[9]:
{'sse_cluster_0': 1759594376.2916665,
'sse_all_cluster': 36157681312.8631,
'sse_cluster_1': 8760559192.57143,
'sse_cluster_2': 25637527744.0}