Open in CoCalc
In [1]:
# import import numpy as np import matplotlib.pyplot as plt import scipy.cluster.hierarchy as sch from sklearn.cluster import AgglomerativeClustering # fake data gradesy=np.array([66.53151386,71.8007043,89.27401364,86.71081817,80.24271855,84.44311648,81.53247556,76.23224501,92.70329994,76.35369269,79.34599925,76.54560984,84.92265733,82.14320834,85.49566018,92.90703304,89.56281031,70.09917522,91.48518417]) gradesx=np.zeros(gradesy.size) grades=np.concatenate((gradesx.reshape(gradesy.size,1),gradesy.reshape(gradesy.size,1)),axis=1) #plot fake date before sorting to see input plt.scatter(grades[:,0], grades[:,1], s=100)
<matplotlib.collections.PathCollection at 0x7fdee769cf98>
In [2]:
# make dendrogram dendrogram = sch.dendrogram(sch.linkage(grades, method='ward')) # find clusters clusters=8 hc = AgglomerativeClustering(n_clusters=clusters, affinity = 'euclidean', linkage = 'ward')
In [3]:
# make 2d scatter plot even though its not the best way to visualize this y_hc = hc.fit_predict(grades) for i in range(0,clusters): plt.scatter(grades[y_hc ==i,0], grades[y_hc == i,1], s=100)
In [ ]: