Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Jupyter notebook moxliukas/manoseimas_lt.ipynb

Views: 167
Image: ubuntu2004
Kernel: Python 2 (system-wide)
import pandas as pd
data = pd.read_csv('http://petras.kudaras.lt/notebooks/manoseimas.csv').set_index('Partija')/100.
data.T.ix[:,:7]
Partija TT DP LSDP AW LRLS MG TSLK
Šauktiniai 0.92 0.89 0.96 0.72 0.95 0.79 0.91
Meras 0.82 0.94 0.89 0.85 0.31 0.89 0.55
Internetinis_balsavimas 0.49 0.80 0.91 0.11 1.00 0.74 0.22
Darbo_kodeksas 0.91 0.79 0.98 0.15 0.95 0.41 0.42
Švietimas 0.91 0.92 0.86 0.60 0.29 0.63 0.42
Vaiko_teisės 0.74 0.78 0.91 0.44 0.76 0.40 0.34
Alkoholis 0.59 0.53 0.55 0.68 0.44 0.75 0.82
Pensijos 0.79 0.85 0.89 0.25 0.88 0.24 0.30
Pabėgėliai 0.96 0.91 0.97 0.89 0.59 0.84 0.84
Dviguba_pilietybė 0.94 0.90 0.94 0.83 0.91 0.81 0.64
Asmenvardžiai 0.81 0.61 0.69 0.83 0.51 0.29 0.15
Privati_informacija 0.76 0.76 0.81 0.59 0.75 0.63 0.74
data.T.ix[:,:7].corr().round(2)
Partija TT DP LSDP AW LRLS MG TSLK
Partija
TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30
DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13
LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00
AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52
LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06
MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68
TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00
data.T.corr().round(2)
Partija TT DP LSDP AW LRLS MG TSLK PETRAS
Partija
TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30 0.61
DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13 0.12
LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00 0.07
AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52 0.41
LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06 -0.01
MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68 -0.24
TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00 -0.07
PETRAS 0.61 0.12 0.07 0.41 -0.01 -0.24 -0.07 1.00
import statsmodels.sandbox.tools as smt proj = smt.tools_pca.pca(data.ix[:7,:]) proj[1][:2]
array([[ 3.20154150e-01 +0.00000000e+00j, -3.42352852e-01 +0.00000000e+00j, -1.63719781e-01 +0.00000000e+00j, -1.58295388e-01 +0.00000000e+00j, 8.57586129e-02 +0.00000000e+00j, -4.71657375e-02 +0.00000000e+00j, -1.48061657e-16 +0.00000000e+00j, -3.82778902e-17 +0.00000000e+00j, -1.65287341e-16 +0.00000000e+00j, 1.77182475e-16 +1.36613093e-17j, 1.77182475e-16 -1.36613093e-17j, -2.26405114e-16 +0.00000000e+00j], [ 4.16700182e-01 +0.00000000e+00j, -2.29248261e-01 +0.00000000e+00j, 1.46657006e-01 +0.00000000e+00j, -4.92214565e-03 +0.00000000e+00j, -1.06137660e-01 +0.00000000e+00j, -4.93872292e-02 +0.00000000e+00j, 2.52659466e-16 +0.00000000e+00j, 3.35547327e-18 +0.00000000e+00j, 1.33891767e-17 +0.00000000e+00j, -4.65968536e-17 +9.32450063e-18j, -4.65968536e-17 -9.32450063e-18j, 4.07423015e-17 +0.00000000e+00j]])
proj[2]
array([ 3.72602052e-01 +0.00000000e+00j, 1.50310469e-01 +0.00000000e+00j, 5.72978565e-02 +0.00000000e+00j, 3.25974011e-02 +0.00000000e+00j, 3.88242528e-03 +0.00000000e+00j, 2.32408111e-03 +0.00000000e+00j, 1.45793837e-17 +0.00000000e+00j, 5.49062001e-18 +0.00000000e+00j, 8.71149841e-19 +0.00000000e+00j, -2.63689929e-18 +2.38692229e-18j, -2.63689929e-18 -2.38692229e-18j, -9.44840897e-18 +0.00000000e+00j])
%matplotlib inline import matplotlib matplotlib.style.use('ggplot') import matplotlib.pyplot as plt def add_label(row): ax.annotate(row.name, row.values, xytext=(10,-5), textcoords='offset points', size=16, color='k'); return row pca_df = -pd.DataFrame(data=proj[1], index=data.ix[:7,:].index, columns=['PC1', 'PC2']) ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7)) pca_df.apply(add_label, axis=1) plt.show()
from IPython.html.widgets import * def pca_plot(Sauktiniai, Meras, Internetinis_balsavimas, Darbo_kodeksas, Svietimas, Vaiko_teises, Alkoholis, Pensijos, Pabegeliai, Dviguba_pilietybe, Asmenvardziai, Privati_informacija): # Petras projekcija data_with_petras = pd.concat([data.ix[:7,:], pd.DataFrame([range(12)], index=['PETRAS'], columns=data.columns)]) proj = smt.tools_pca.pca(data_with_petras, keepdim=2) pca_df = -pd.DataFrame(data=proj[1], index=data_with_petras.index, columns=['PC1', 'PC2']) ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7)) def add_label(row): ax.annotate(row.name, row.values, xytext=(10,-5), textcoords='offset points', size=16, color='k'); return row pca_df.apply(add_label, axis=1) plt.show() interact(pca_plot, Sauktiniai=0.80, Meras=0.60, Internetinis_balsavimas=0.10, Darbo_kodeksas=0.55, Svietimas=0.50, Vaiko_teises=0.40, Alkoholis=0.30, Pensijos=0.60, Pabegeliai=0.30, Dviguba_pilietybe=0.80, Asmenvardziai=0.90, Privati_informacija=0.30)