Sharedmoxliukas / manoseimas_lt.ipynbOpen in CoCalc
Author: Mindey I.
Views : 15
Description: Jupyter notebook moxliukas/manoseimas_lt.ipynb
In [1]:
import pandas as pd
In [2]:
data = pd.read_csv('http://petras.kudaras.lt/notebooks/manoseimas.csv').set_index('Partija')/100.
In [3]:
data.T.ix[:,:7]
Partija TT DP LSDP AW LRLS MG TSLK
Šauktiniai 0.92 0.89 0.96 0.72 0.95 0.79 0.91
Meras 0.82 0.94 0.89 0.85 0.31 0.89 0.55
Internetinis_balsavimas 0.49 0.80 0.91 0.11 1.00 0.74 0.22
Darbo_kodeksas 0.91 0.79 0.98 0.15 0.95 0.41 0.42
Švietimas 0.91 0.92 0.86 0.60 0.29 0.63 0.42
Vaiko_teisės 0.74 0.78 0.91 0.44 0.76 0.40 0.34
Alkoholis 0.59 0.53 0.55 0.68 0.44 0.75 0.82
Pensijos 0.79 0.85 0.89 0.25 0.88 0.24 0.30
Pabėgėliai 0.96 0.91 0.97 0.89 0.59 0.84 0.84
Dviguba_pilietybė 0.94 0.90 0.94 0.83 0.91 0.81 0.64
Asmenvardžiai 0.81 0.61 0.69 0.83 0.51 0.29 0.15
Privati_informacija 0.76 0.76 0.81 0.59 0.75 0.63 0.74
In [4]:
data.T.ix[:,:7].corr().round(2)
Partija TT DP LSDP AW LRLS MG TSLK
Partija
TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30
DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13
LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00
AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52
LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06
MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68
TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00
In [5]:
data.T.corr().round(2)
Partija TT DP LSDP AW LRLS MG TSLK PETRAS
Partija
TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30 0.61
DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13 0.12
LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00 0.07
AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52 0.41
LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06 -0.01
MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68 -0.24
TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00 -0.07
PETRAS 0.61 0.12 0.07 0.41 -0.01 -0.24 -0.07 1.00
In [23]:
import statsmodels.sandbox.tools as smt proj = smt.tools_pca.pca(data.ix[:7,:]) proj[1][:2]
array([[ 3.20154150e-01 +0.00000000e+00j, -3.42352852e-01 +0.00000000e+00j, -1.63719781e-01 +0.00000000e+00j, -1.58295388e-01 +0.00000000e+00j, 8.57586129e-02 +0.00000000e+00j, -4.71657375e-02 +0.00000000e+00j, -1.48061657e-16 +0.00000000e+00j, -3.82778902e-17 +0.00000000e+00j, -1.65287341e-16 +0.00000000e+00j, 1.77182475e-16 +1.36613093e-17j, 1.77182475e-16 -1.36613093e-17j, -2.26405114e-16 +0.00000000e+00j], [ 4.16700182e-01 +0.00000000e+00j, -2.29248261e-01 +0.00000000e+00j, 1.46657006e-01 +0.00000000e+00j, -4.92214565e-03 +0.00000000e+00j, -1.06137660e-01 +0.00000000e+00j, -4.93872292e-02 +0.00000000e+00j, 2.52659466e-16 +0.00000000e+00j, 3.35547327e-18 +0.00000000e+00j, 1.33891767e-17 +0.00000000e+00j, -4.65968536e-17 +9.32450063e-18j, -4.65968536e-17 -9.32450063e-18j, 4.07423015e-17 +0.00000000e+00j]])
In [25]:
proj[2]
array([ 3.72602052e-01 +0.00000000e+00j, 1.50310469e-01 +0.00000000e+00j, 5.72978565e-02 +0.00000000e+00j, 3.25974011e-02 +0.00000000e+00j, 3.88242528e-03 +0.00000000e+00j, 2.32408111e-03 +0.00000000e+00j, 1.45793837e-17 +0.00000000e+00j, 5.49062001e-18 +0.00000000e+00j, 8.71149841e-19 +0.00000000e+00j, -2.63689929e-18 +2.38692229e-18j, -2.63689929e-18 -2.38692229e-18j, -9.44840897e-18 +0.00000000e+00j])
In [ ]:
%matplotlib inline import matplotlib matplotlib.style.use('ggplot') import matplotlib.pyplot as plt def add_label(row): ax.annotate(row.name, row.values, xytext=(10,-5), textcoords='offset points', size=16, color='k'); return row pca_df = -pd.DataFrame(data=proj[1], index=data.ix[:7,:].index, columns=['PC1', 'PC2']) ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7)) pca_df.apply(add_label, axis=1) plt.show()
In [ ]:
from IPython.html.widgets import * def pca_plot(Sauktiniai, Meras, Internetinis_balsavimas, Darbo_kodeksas, Svietimas, Vaiko_teises, Alkoholis, Pensijos, Pabegeliai, Dviguba_pilietybe, Asmenvardziai, Privati_informacija): # Petras projekcija data_with_petras = pd.concat([data.ix[:7,:], pd.DataFrame([range(12)], index=['PETRAS'], columns=data.columns)]) proj = smt.tools_pca.pca(data_with_petras, keepdim=2) pca_df = -pd.DataFrame(data=proj[1], index=data_with_petras.index, columns=['PC1', 'PC2']) ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7)) def add_label(row): ax.annotate(row.name, row.values, xytext=(10,-5), textcoords='offset points', size=16, color='k'); return row pca_df.apply(add_label, axis=1) plt.show() interact(pca_plot, Sauktiniai=0.80, Meras=0.60, Internetinis_balsavimas=0.10, Darbo_kodeksas=0.55, Svietimas=0.50, Vaiko_teises=0.40, Alkoholis=0.30, Pensijos=0.60, Pabegeliai=0.30, Dviguba_pilietybe=0.80, Asmenvardziai=0.90, Privati_informacija=0.30)
In [ ]: