Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 647
Image: ubuntu2004
Kernel: Python 3 (system-wide)
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.datasets.samples_generator module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.datasets. Anything that cannot be imported from sklearn.datasets is now part of the private API. warnings.warn(message, FutureWarning)
from sklearn.cluster import KMeans
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
plt.scatter(X[:,0], X[:,1])
<matplotlib.collections.PathCollection at 0x7f43a8a6b8d0>
Image in a Jupyter notebook
wcss = []
for i in range(1, 11): kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0) kmeans.fit(X) wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss) plt.title('Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('WCSS') plt.show()
Image in a Jupyter notebook
kmeans = KMeans(n_clusters=4, init='k-means++', max_iter=300, n_init=10, random_state=0) pred_y = kmeans.fit_predict(X) plt.scatter(X[:,0], X[:,1]) plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red') plt.show()
Image in a Jupyter notebook
import pandas as pd
from matplotlib import pyplot as plt
import sklearn
from sklearn import datasets
from sklearn.cluster import KMeans
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import scale
import sklearn.metrics as sm
iris = datasets.load_iris() #https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets for other data sets.
X = scale(iris.data)
y = pd.DataFrame(iris.target)
variable_names = iris.feature_names
X[0:10,]
array([[-0.90068117, 1.01900435, -1.34022653, -1.3154443 ], [-1.14301691, -0.13197948, -1.34022653, -1.3154443 ], [-1.38535265, 0.32841405, -1.39706395, -1.3154443 ], [-1.50652052, 0.09821729, -1.2833891 , -1.3154443 ], [-1.02184904, 1.24920112, -1.34022653, -1.3154443 ], [-0.53717756, 1.93979142, -1.16971425, -1.05217993], [-1.50652052, 0.78880759, -1.34022653, -1.18381211], [-1.02184904, 0.78880759, -1.2833891 , -1.3154443 ], [-1.74885626, -0.36217625, -1.34022653, -1.3154443 ], [-1.14301691, 0.09821729, -1.2833891 , -1.44707648]])
clustering = KMeans(n_clusters = 3, random_state = 5)
clustering.fit(X)
KMeans(n_clusters=3, random_state=5)
iris_df = pd.DataFrame(iris.data)
iris_df.shape
(150, 4)
iris_df.head()
0 1 2 3
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
iris_df.columns = ["Sepal_Length","Sepal_Width","Petal_Length","Petal_Width"]
y.columns = ["Target"]
iris_df.head()
Sepal_Length Sepal_Width Petal_Length Petal_Width
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
color_theme = np.array(['darkgray','lightsalmon','powderblue'])
plt.subplot(1,2,1)
<AxesSubplot:>
Image in a Jupyter notebook
plt.scatter(x=iris_df.Petal_Length, y =iris_df.Petal_Width, c=color_)