Kernel: Python 3 (old Anaconda 3)
Scikit Learn is an advanced machine learning library.
It's main design idea is to fit a prediction model to a dataset by an API, which is consistenly used throughout all methods.
In the first example here, we give the classical K-Means algorithm a hard time: 4 random blobs but only 3 clusters to look for.
In [1]:
<matplotlib.collections.PathCollection at 0x7f931cb6c470>
Setup
In [2]:
[[-1 -1]
[-2 -1]
[-3 -2]
[ 1 1]
[ 2 1]
[ 3 2]]
[ 0.99244289 0.00755711]
[ 6.30061232 0.54980396]
In [3]:
In [4]:
array([[ 0.14951303, -2.07990586, -0.11172609, 0.20282039, -0.68910226,
-1.9354933 ],
[ 0.55589371, -0.36440374, -0.49965389, 1.75017318, 0.60128138,
0.8083439 ],
[ 1.59974156, 1.38593198, 0.04118741, 1.58465053, -0.32814035,
0.38628408],
[ 2.34546148, -0.84474695, -0.13565193, 0.36467729, -1.06126775,
-0.73609866],
[ 0.67086539, 1.29918519, -0.66543366, -0.36079058, 0.49720726,
-1.09133919],
[-1.27017016, 0.91480503, 1.55474907, -2.34989812, -2.03220673,
-0.01429826],
[ 0.20249946, 0.46564405, -0.96683714, -0.28651864, 0.29876689,
0.0228112 ],
[-0.45950656, 1.84434408, -0.29457464, 0.54662151, 0.88931201,
-0.38255987],
[ 0.65336634, -1.08585497, -1.43459617, -0.02805966, -0.83315637,
0.64720964]])
In [5]:
In [6]:
[[ 0.14951303 -2.07990586 -0.11172609 0.20282039 -0.68910226 -1.9354933 ]
[ 0.55589371 -0.36440374 -0.49965389 1.75017318 0.60128138 0.8083439 ]
[ 1.59974156 1.38593198 0.04118741 1.58465053 -0.32814035 0.38628408]
[ 2.34546148 -0.84474695 -0.13565193 0.36467729 -1.06126775 -0.73609866]
[ 0.67086539 1.29918519 -0.66543366 -0.36079058 0.49720726 -1.09133919]
[-1.27017016 0.91480503 1.55474907 -2.34989812 -2.03220673 -0.01429826]
[ 0.20249946 0.46564405 -0.96683714 -0.28651864 0.29876689 0.0228112 ]
[-0.45950656 1.84434408 -0.29457464 0.54662151 0.88931201 -0.38255987]
[ 0.65336634 -1.08585497 -1.43459617 -0.02805966 -0.83315637 0.64720964]]
In [0]:
In [7]:
((9, 9), (6, 6), (6,))
In [8]:
[ 4.70459985 4.11279482 2.89975439 2.49793917 1.95932535 0.9420679 ]
In [9]:
In [10]:
[[ 4.70459985 0. 0. 0. 0. 0. ]
[ 0. 4.11279482 0. 0. 0. 0. ]
[ 0. 0. 2.89975439 0. 0. 0. ]
[ 0. 0. 0. 2.49793917 0. 0. ]
[ 0. 0. 0. 0. 1.95932535 0. ]
[ 0. 0. 0. 0. 0. 0.9420679 ]
[ 0. 0. 0. 0. 0. 0. ]
[ 0. 0. 0. 0. 0. 0. ]
[ 0. 0. 0. 0. 0. 0. ]]
In [11]:
numpy.ndarray
In [12]:
In [13]:
[[ 0.14951303 -2.07990586 -0.11172609 0.20282039 -0.68910226 -1.9354933 ]
[ 0.55589371 -0.36440374 -0.49965389 1.75017318 0.60128138 0.8083439 ]
[ 1.59974156 1.38593198 0.04118741 1.58465053 -0.32814035 0.38628408]
[ 2.34546148 -0.84474695 -0.13565193 0.36467729 -1.06126775 -0.73609866]
[ 0.67086539 1.29918519 -0.66543366 -0.36079058 0.49720726 -1.09133919]
[-1.27017016 0.91480503 1.55474907 -2.34989812 -2.03220673 -0.01429826]
[ 0.20249946 0.46564405 -0.96683714 -0.28651864 0.29876689 0.0228112 ]
[-0.45950656 1.84434408 -0.29457464 0.54662151 0.88931201 -0.38255987]
[ 0.65336634 -1.08585497 -1.43459617 -0.02805966 -0.83315637 0.64720964]]
In [14]:
[[ 0.14951303 -2.07990586 -0.11172609 0.20282039 -0.68910226 -1.9354933 ]
[ 0.55589371 -0.36440374 -0.49965389 1.75017318 0.60128138 0.8083439 ]
[ 1.59974156 1.38593198 0.04118741 1.58465053 -0.32814035 0.38628408]
[ 2.34546148 -0.84474695 -0.13565193 0.36467729 -1.06126775 -0.73609866]
[ 0.67086539 1.29918519 -0.66543366 -0.36079058 0.49720726 -1.09133919]
[-1.27017016 0.91480503 1.55474907 -2.34989812 -2.03220673 -0.01429826]
[ 0.20249946 0.46564405 -0.96683714 -0.28651864 0.29876689 0.0228112 ]
[-0.45950656 1.84434408 -0.29457464 0.54662151 0.88931201 -0.38255987]
[ 0.65336634 -1.08585497 -1.43459617 -0.02805966 -0.83315637 0.64720964]]
In [15]:
9
In [16]:
In [17]:
In [18]:
(array([[-0.61065421, 0.29544672, 0.73471945],
[ 0.04664036, 0.93960405, -0.33907065],
[-0.79052268, -0.17278734, -0.58755291]]),
array([[ 4.20973742, 0. , 0. , 0. , 0. ],
[ 0. , 2.76957395, 0. , 0. , 0. ],
[ 0. , 0. , 1.86906999, 0. , 0. ]]),
array([[-0.27331948, 0.33380402, -0.02231331, 0.79648941, -0.42305801],
[ 0.11214579, -0.51175169, 0.7833416 , 0.10645057, -0.31713994],
[ 0.47375278, 0.03935592, 0.16734891, 0.51790589, 0.69121432],
[ 0.28391369, -0.67456705, -0.59820857, 0.25561576, -0.20287781],
[-0.77953262, -0.41242634, 0.00434816, 0.14389915, 0.44889591]]))
In [19]:
In [20]:
array([[ 1.],
[ 1.],
[ 1.]])
In [21]:
(3, 1)
In [22]:
array([[ 1., 1., 1.]])
In [23]:
In [24]:
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
In [25]:
In [26]:
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
In [27]:
In [28]:
In [29]:
array([[-0.14710569],
[-0.93993508],
[ 0.17662994],
[ 0.02092006],
[ 0.82216025]])
In [30]:
(5, 1)
é a matriz dos atributos; é a matriz centrada.
In [31]:
In [32]:
array([[-1.92334498, 0.9847132 , 0.93863178],
[-0.25581362, -0.85593803, 1.11175164],
[ 0.78284708, -1.61886148, 0.8360144 ],
[ 0.8300604 , -0.58065568, -0.24940472],
[ 0.13466784, -0.1774775 , 0.04280966]])
Covariância
In [33]:
array([[ 1.84998187, 0.23089659, -0.77169653, -0.80079034, -0.13119822],
[ 0.23089659, 0.67802074, 0.70494085, 0.00246281, 0.05501786],
[-0.77169653, 0.70494085, 1.31082737, 0.46043518, 0.14284177],
[-0.80079034, 0.00246281, 0.46043518, 0.362788 , 0.06805295],
[-0.13119822, 0.05501786, 0.14284177, 0.06805295, 0.01715545]])
In [34]:
(array([[-0.74180012, -0.46975394, 0.2690093 , -0.38937913, 0.07128612],
[ 0.10673064, -0.6721524 , 0.16727602, 0.70934079, -0.07532667],
[ 0.55543022, -0.56126746, -0.25057434, -0.55935268, -0.02851645],
[ 0.35253976, 0.10655834, 0.91456776, -0.16694197, 0.00756512],
[ 0.0745261 , -0.03415468, -0.02076163, 0.06689177, 0.99416989]]),
array([[ 2.78833162e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 1.43044182e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 1.21600495e-16,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
7.96208428e-17, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 3.86762520e-18]]),
array([[-0.74180012, 0.10673064, 0.55543022, 0.35253976, 0.0745261 ],
[-0.46975394, -0.6721524 , -0.56126746, 0.10655834, -0.03415468],
[ 0.3929592 , -0.10278289, -0.03062826, 0.91275679, -0.03091438],
[ 0.26133121, -0.71692996, 0.61280343, -0.17624886, -0.10547853],
[ 0.07970496, -0.11075571, 0.00315661, -0.01313114, 0.99055434]]))
In [35]:
In [36]:
array([[ 2.78833162e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 1.43044182e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 1.21600495e-16,
0.00000000e+00, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
7.96208428e-17, 0.00000000e+00],
[ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 3.86762520e-18]])
In [0]:
In [0]: