| Hosted by CoCalc | Download
Kernel: Python 3 (system-wide)
# import import numpy as np import matplotlib.pyplot as plt import scipy.cluster.hierarchy as sch from sklearn.cluster import AgglomerativeClustering # fake data gradesy=np.array([94.28379291,86.98982195,86.63611546,85.7203357,84.52017914,83.11406572,82.5180994,80.0522343,77.76367201,76.87708018,74.87290704,74.43641397,70.92109845,70.74501915,69.955087,68.72333003,67.43377364,65.31794233,65.19391681,64.07626518,46.06843416]) gradesx=np.zeros(gradesy.size) grades=np.concatenate((gradesx.reshape(gradesy.size,1),gradesy.reshape(gradesy.size,1)),axis=1) #plot fake date before sorting to see input plt.scatter(grades[:,0], grades[:,1], s=100)
<matplotlib.collections.PathCollection at 0x7f822b9a7f28>
Image in a Jupyter notebook
# make dendrogram dendrogram = sch.dendrogram(sch.linkage(grades, method='ward')) # find clusters clusters=11 hc = AgglomerativeClustering(n_clusters=clusters, affinity = 'euclidean', linkage = 'ward')
Image in a Jupyter notebook
# make 2d scatter plot even though its not the best way to visualize this y_hc = hc.fit_predict(grades) for i in range(0,clusters): plt.scatter(grades[y_hc ==i,0], grades[y_hc == i,1], s=100) print("Cluster ",i," ", grades[y_hc == i,1])
Cluster 0 [65.31794233 65.19391681 64.07626518] Cluster 1 [68.72333003 67.43377364] Cluster 2 [85.7203357 84.52017914] Cluster 3 [70.92109845 70.74501915 69.955087 ] Cluster 4 [86.98982195 86.63611546] Cluster 5 [46.06843416] Cluster 6 [74.87290704 74.43641397] Cluster 7 [83.11406572 82.5180994 ] Cluster 8 [77.76367201 76.87708018] Cluster 9 [94.28379291] Cluster 10 [80.0522343]
Image in a Jupyter notebook