Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download
Views: 647
Image: ubuntu2004
Kernel: Python 3 (system-wide)
#import pandas as pd #import numpy as np #import matplotlib.pyplot as plt #from sklearn.cluster import KMeans #%matplotlib inline #X= -2 * np.random.rand(100,2) #X1 = 1 + 2 * np.random.rand(50,2) #X[50:100, :] = X1 #plt.scatter(X[ : , 0], X[ :, 1], s = 50, c = ‘b’) #plt.show() #from sklearn.cluster import KMeans #Kmean = KMeans(n_clusters=2) #Kmean.fit(X) #Kmean.cluster_centers_ #plt.scatter(X[ : , 0], X[ : , 1], s =50, c=’b’) #plt.scatter(-0.94665068, -0.97138368, s=200, c=’g’, marker=’s’) #plt.scatter(2.01559419, 2.02597093, s=200, c=’r’, marker=’s’) #plt.show() #Kmean.labels_ #sample_test=np.array([-3.0,-3.0]) #second_test=sample_test.reshape(1, -1) #Kmean.predict(second_test)
import pandas as pd # External library of methods that allow us to empower our Python code capabilities
import numpy as np # External library of methods built with C++ or C# that gives us the numpy array data structure.
from matplotlib import pyplot as plt # matplot Library is an external library that allows us to import visualization tools such as scatter plots, graphs, heatmaps, etc.
import sklearn #Sklearn is our machine learning external library tool.
from sklearn import datasets #Import the datsets sklearn has on its site
from sklearn.cluster import KMeans # From the cluster list of algorithms only import KMeans
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import scale #importing our scale method
import sklearn.metrics as sm #importing metrics from sklearn
iris = datasets.load_iris() #https://scikit-learn.org/stable/modules/classes.html#module-sklearn.datasets for other data sets.
iris.data
array([[5.1, 3.5, 1.4, 0.2], [4.9, 3. , 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5. , 3.6, 1.4, 0.2], [5.4, 3.9, 1.7, 0.4], [4.6, 3.4, 1.4, 0.3], [5. , 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3. , 1.4, 0.1], [4.3, 3. , 1.1, 0.1], [5.8, 4. , 1.2, 0.2], [5.7, 4.4, 1.5, 0.4], [5.4, 3.9, 1.3, 0.4], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.1, 3.8, 1.5, 0.3], [5.4, 3.4, 1.7, 0.2], [5.1, 3.7, 1.5, 0.4], [4.6, 3.6, 1. , 0.2], [5.1, 3.3, 1.7, 0.5], [4.8, 3.4, 1.9, 0.2], [5. , 3. , 1.6, 0.2], [5. , 3.4, 1.6, 0.4], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [5.4, 3.4, 1.5, 0.4], [5.2, 4.1, 1.5, 0.1], [5.5, 4.2, 1.4, 0.2], [4.9, 3.1, 1.5, 0.2], [5. , 3.2, 1.2, 0.2], [5.5, 3.5, 1.3, 0.2], [4.9, 3.6, 1.4, 0.1], [4.4, 3. , 1.3, 0.2], [5.1, 3.4, 1.5, 0.2], [5. , 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3], [4.4, 3.2, 1.3, 0.2], [5. , 3.5, 1.6, 0.6], [5.1, 3.8, 1.9, 0.4], [4.8, 3. , 1.4, 0.3], [5.1, 3.8, 1.6, 0.2], [4.6, 3.2, 1.4, 0.2], [5.3, 3.7, 1.5, 0.2], [5. , 3.3, 1.4, 0.2], [7. , 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], [5.5, 2.3, 4. , 1.3], [6.5, 2.8, 4.6, 1.5], [5.7, 2.8, 4.5, 1.3], [6.3, 3.3, 4.7, 1.6], [4.9, 2.4, 3.3, 1. ], [6.6, 2.9, 4.6, 1.3], [5.2, 2.7, 3.9, 1.4], [5. , 2. , 3.5, 1. ], [5.9, 3. , 4.2, 1.5], [6. , 2.2, 4. , 1. ], [6.1, 2.9, 4.7, 1.4], [5.6, 2.9, 3.6, 1.3], [6.7, 3.1, 4.4, 1.4], [5.6, 3. , 4.5, 1.5], [5.8, 2.7, 4.1, 1. ], [6.2, 2.2, 4.5, 1.5], [5.6, 2.5, 3.9, 1.1], [5.9, 3.2, 4.8, 1.8], [6.1, 2.8, 4. , 1.3], [6.3, 2.5, 4.9, 1.5], [6.1, 2.8, 4.7, 1.2], [6.4, 2.9, 4.3, 1.3], [6.6, 3. , 4.4, 1.4], [6.8, 2.8, 4.8, 1.4], [6.7, 3. , 5. , 1.7], [6. , 2.9, 4.5, 1.5], [5.7, 2.6, 3.5, 1. ], [5.5, 2.4, 3.8, 1.1], [5.5, 2.4, 3.7, 1. ], [5.8, 2.7, 3.9, 1.2], [6. , 2.7, 5.1, 1.6], [5.4, 3. , 4.5, 1.5], [6. , 3.4, 4.5, 1.6], [6.7, 3.1, 4.7, 1.5], [6.3, 2.3, 4.4, 1.3], [5.6, 3. , 4.1, 1.3], [5.5, 2.5, 4. , 1.3], [5.5, 2.6, 4.4, 1.2], [6.1, 3. , 4.6, 1.4], [5.8, 2.6, 4. , 1.2], [5. , 2.3, 3.3, 1. ], [5.6, 2.7, 4.2, 1.3], [5.7, 3. , 4.2, 1.2], [5.7, 2.9, 4.2, 1.3], [6.2, 2.9, 4.3, 1.3], [5.1, 2.5, 3. , 1.1], [5.7, 2.8, 4.1, 1.3], [6.3, 3.3, 6. , 2.5], [5.8, 2.7, 5.1, 1.9], [7.1, 3. , 5.9, 2.1], [6.3, 2.9, 5.6, 1.8], [6.5, 3. , 5.8, 2.2], [7.6, 3. , 6.6, 2.1], [4.9, 2.5, 4.5, 1.7], [7.3, 2.9, 6.3, 1.8], [6.7, 2.5, 5.8, 1.8], [7.2, 3.6, 6.1, 2.5], [6.5, 3.2, 5.1, 2. ], [6.4, 2.7, 5.3, 1.9], [6.8, 3. , 5.5, 2.1], [5.7, 2.5, 5. , 2. ], [5.8, 2.8, 5.1, 2.4], [6.4, 3.2, 5.3, 2.3], [6.5, 3. , 5.5, 1.8], [7.7, 3.8, 6.7, 2.2], [7.7, 2.6, 6.9, 2.3], [6. , 2.2, 5. , 1.5], [6.9, 3.2, 5.7, 2.3], [5.6, 2.8, 4.9, 2. ], [7.7, 2.8, 6.7, 2. ], [6.3, 2.7, 4.9, 1.8], [6.7, 3.3, 5.7, 2.1], [7.2, 3.2, 6. , 1.8], [6.2, 2.8, 4.8, 1.8], [6.1, 3. , 4.9, 1.8], [6.4, 2.8, 5.6, 2.1], [7.2, 3. , 5.8, 1.6], [7.4, 2.8, 6.1, 1.9], [7.9, 3.8, 6.4, 2. ], [6.4, 2.8, 5.6, 2.2], [6.3, 2.8, 5.1, 1.5], [6.1, 2.6, 5.6, 1.4], [7.7, 3. , 6.1, 2.3], [6.3, 3.4, 5.6, 2.4], [6.4, 3.1, 5.5, 1.8], [6. , 3. , 4.8, 1.8], [6.9, 3.1, 5.4, 2.1], [6.7, 3.1, 5.6, 2.4], [6.9, 3.1, 5.1, 2.3], [5.8, 2.7, 5.1, 1.9], [6.8, 3.2, 5.9, 2.3], [6.7, 3.3, 5.7, 2.5], [6.7, 3. , 5.2, 2.3], [6.3, 2.5, 5. , 1.9], [6.5, 3. , 5.2, 2. ], [6.2, 3.4, 5.4, 2.3], [5.9, 3. , 5.1, 1.8]])
X = scale(iris.data) # standardizes our data to fit any axis our data will go on.
y = pd.DataFrame(iris.target) #grabbing the target column of our data set and assign it to y.
print(y)
0 0 0 1 0 2 0 3 0 4 0 .. .. 145 2 146 2 147 2 148 2 149 2 [150 rows x 1 columns]
variable_names = iris.feature_names
print(variable_names)
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
X[0:10,]
array([[-0.90068117, 1.01900435, -1.34022653, -1.3154443 ], [-1.14301691, -0.13197948, -1.34022653, -1.3154443 ], [-1.38535265, 0.32841405, -1.39706395, -1.3154443 ], [-1.50652052, 0.09821729, -1.2833891 , -1.3154443 ], [-1.02184904, 1.24920112, -1.34022653, -1.3154443 ], [-0.53717756, 1.93979142, -1.16971425, -1.05217993], [-1.50652052, 0.78880759, -1.34022653, -1.18381211], [-1.02184904, 0.78880759, -1.2833891 , -1.3154443 ], [-1.74885626, -0.36217625, -1.34022653, -1.3154443 ], [-1.14301691, 0.09821729, -1.2833891 , -1.44707648]])
clustering = KMeans(n_clusters = 3, random_state = 5) #n_clusters refers to the number of clusters in our group and random_state referss to the number of centroids in our cluster
clustering.fit(X) # Fitting a model means training our machine to understand the data given to it.
KMeans(n_clusters=3, random_state=5)
iris_df = pd.DataFrame(iris.data) #Converting our dataset from sklearn into a dataFrame, a data structure associated with pandas.
iris_df.shape #returns the number of rows and columns in our dataframe dataset.
(150, 4)
iris_df.head()
0 1 2 3
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
iris_df.columns = ["Sepal_Length","Sepal_Width","Petal_Length","Petal_Width"] #.columns will assign column names to our data frame
iris_df.head() #Print the first five rows of our data frame
Sepal_Length Sepal_Width Petal_Length Petal_Width
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
y.columns = ["Target"]
color_theme = np.array(['darkgray','lightsalmon','powderblue'])
plt.subplot(1,2,1) plt.scatter(x=iris_df.Petal_Length, y =iris_df.Petal_Width, c=color_theme[iris.target], s=50) plt.title("Ground Truth Classfication") plt.subplot(1,2,2) plt.scatter(x=iris_df.Petal_Length, y =iris_df.Petal_Width, c=color_theme[clustering.labels_], s=50) plt.title("K Means Classfication")
Text(0.5, 1.0, 'K Means Classfication')
Image in a Jupyter notebook
Text(0.5, 1.0, 'K Means Classfication')
Image in a Jupyter notebook
<matplotlib.collections.PathCollection at 0x7fa91275d2b0>
Image in a Jupyter notebook
Text(0.5, 1.0, 'K Means Classfication')
Image in a Jupyter notebook