{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [
],
"source": [
"data = pd.read_csv('http://petras.kudaras.lt/notebooks/manoseimas.csv').set_index('Partija')/100."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" Partija | \n",
" TT | \n",
" DP | \n",
" LSDP | \n",
" AW | \n",
" LRLS | \n",
" MG | \n",
" TSLK | \n",
"
\n",
" \n",
" \n",
" \n",
" Šauktiniai | \n",
" 0.92 | \n",
" 0.89 | \n",
" 0.96 | \n",
" 0.72 | \n",
" 0.95 | \n",
" 0.79 | \n",
" 0.91 | \n",
"
\n",
" \n",
" Meras | \n",
" 0.82 | \n",
" 0.94 | \n",
" 0.89 | \n",
" 0.85 | \n",
" 0.31 | \n",
" 0.89 | \n",
" 0.55 | \n",
"
\n",
" \n",
" Internetinis_balsavimas | \n",
" 0.49 | \n",
" 0.80 | \n",
" 0.91 | \n",
" 0.11 | \n",
" 1.00 | \n",
" 0.74 | \n",
" 0.22 | \n",
"
\n",
" \n",
" Darbo_kodeksas | \n",
" 0.91 | \n",
" 0.79 | \n",
" 0.98 | \n",
" 0.15 | \n",
" 0.95 | \n",
" 0.41 | \n",
" 0.42 | \n",
"
\n",
" \n",
" Švietimas | \n",
" 0.91 | \n",
" 0.92 | \n",
" 0.86 | \n",
" 0.60 | \n",
" 0.29 | \n",
" 0.63 | \n",
" 0.42 | \n",
"
\n",
" \n",
" Vaiko_teisės | \n",
" 0.74 | \n",
" 0.78 | \n",
" 0.91 | \n",
" 0.44 | \n",
" 0.76 | \n",
" 0.40 | \n",
" 0.34 | \n",
"
\n",
" \n",
" Alkoholis | \n",
" 0.59 | \n",
" 0.53 | \n",
" 0.55 | \n",
" 0.68 | \n",
" 0.44 | \n",
" 0.75 | \n",
" 0.82 | \n",
"
\n",
" \n",
" Pensijos | \n",
" 0.79 | \n",
" 0.85 | \n",
" 0.89 | \n",
" 0.25 | \n",
" 0.88 | \n",
" 0.24 | \n",
" 0.30 | \n",
"
\n",
" \n",
" Pabėgėliai | \n",
" 0.96 | \n",
" 0.91 | \n",
" 0.97 | \n",
" 0.89 | \n",
" 0.59 | \n",
" 0.84 | \n",
" 0.84 | \n",
"
\n",
" \n",
" Dviguba_pilietybė | \n",
" 0.94 | \n",
" 0.90 | \n",
" 0.94 | \n",
" 0.83 | \n",
" 0.91 | \n",
" 0.81 | \n",
" 0.64 | \n",
"
\n",
" \n",
" Asmenvardžiai | \n",
" 0.81 | \n",
" 0.61 | \n",
" 0.69 | \n",
" 0.83 | \n",
" 0.51 | \n",
" 0.29 | \n",
" 0.15 | \n",
"
\n",
" \n",
" Privati_informacija | \n",
" 0.76 | \n",
" 0.76 | \n",
" 0.81 | \n",
" 0.59 | \n",
" 0.75 | \n",
" 0.63 | \n",
" 0.74 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Partija TT DP LSDP AW LRLS MG TSLK\n",
"Šauktiniai 0.92 0.89 0.96 0.72 0.95 0.79 0.91\n",
"Meras 0.82 0.94 0.89 0.85 0.31 0.89 0.55\n",
"Internetinis_balsavimas 0.49 0.80 0.91 0.11 1.00 0.74 0.22\n",
"Darbo_kodeksas 0.91 0.79 0.98 0.15 0.95 0.41 0.42\n",
"Švietimas 0.91 0.92 0.86 0.60 0.29 0.63 0.42\n",
"Vaiko_teisės 0.74 0.78 0.91 0.44 0.76 0.40 0.34\n",
"Alkoholis 0.59 0.53 0.55 0.68 0.44 0.75 0.82\n",
"Pensijos 0.79 0.85 0.89 0.25 0.88 0.24 0.30\n",
"Pabėgėliai 0.96 0.91 0.97 0.89 0.59 0.84 0.84\n",
"Dviguba_pilietybė 0.94 0.90 0.94 0.83 0.91 0.81 0.64\n",
"Asmenvardžiai 0.81 0.61 0.69 0.83 0.51 0.29 0.15\n",
"Privati_informacija 0.76 0.76 0.81 0.59 0.75 0.63 0.74"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.T.ix[:,:7]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" Partija | \n",
" TT | \n",
" DP | \n",
" LSDP | \n",
" AW | \n",
" LRLS | \n",
" MG | \n",
" TSLK | \n",
"
\n",
" \n",
" Partija | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" TT | \n",
" 1.00 | \n",
" 0.57 | \n",
" 0.50 | \n",
" 0.45 | \n",
" -0.08 | \n",
" 0.04 | \n",
" 0.30 | \n",
"
\n",
" \n",
" DP | \n",
" 0.57 | \n",
" 1.00 | \n",
" 0.85 | \n",
" 0.07 | \n",
" 0.10 | \n",
" 0.36 | \n",
" 0.13 | \n",
"
\n",
" \n",
" LSDP | \n",
" 0.50 | \n",
" 0.85 | \n",
" 1.00 | \n",
" -0.23 | \n",
" 0.51 | \n",
" 0.12 | \n",
" 0.00 | \n",
"
\n",
" \n",
" AW | \n",
" 0.45 | \n",
" 0.07 | \n",
" -0.23 | \n",
" 1.00 | \n",
" -0.57 | \n",
" 0.48 | \n",
" 0.52 | \n",
"
\n",
" \n",
" LRLS | \n",
" -0.08 | \n",
" 0.10 | \n",
" 0.51 | \n",
" -0.57 | \n",
" 1.00 | \n",
" -0.19 | \n",
" -0.06 | \n",
"
\n",
" \n",
" MG | \n",
" 0.04 | \n",
" 0.36 | \n",
" 0.12 | \n",
" 0.48 | \n",
" -0.19 | \n",
" 1.00 | \n",
" 0.68 | \n",
"
\n",
" \n",
" TSLK | \n",
" 0.30 | \n",
" 0.13 | \n",
" 0.00 | \n",
" 0.52 | \n",
" -0.06 | \n",
" 0.68 | \n",
" 1.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Partija TT DP LSDP AW LRLS MG TSLK\n",
"Partija \n",
"TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30\n",
"DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13\n",
"LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00\n",
"AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52\n",
"LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06\n",
"MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68\n",
"TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00"
]
},
"execution_count": 4,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.T.ix[:,:7].corr().round(2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" Partija | \n",
" TT | \n",
" DP | \n",
" LSDP | \n",
" AW | \n",
" LRLS | \n",
" MG | \n",
" TSLK | \n",
" PETRAS | \n",
"
\n",
" \n",
" Partija | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" TT | \n",
" 1.00 | \n",
" 0.57 | \n",
" 0.50 | \n",
" 0.45 | \n",
" -0.08 | \n",
" 0.04 | \n",
" 0.30 | \n",
" 0.61 | \n",
"
\n",
" \n",
" DP | \n",
" 0.57 | \n",
" 1.00 | \n",
" 0.85 | \n",
" 0.07 | \n",
" 0.10 | \n",
" 0.36 | \n",
" 0.13 | \n",
" 0.12 | \n",
"
\n",
" \n",
" LSDP | \n",
" 0.50 | \n",
" 0.85 | \n",
" 1.00 | \n",
" -0.23 | \n",
" 0.51 | \n",
" 0.12 | \n",
" 0.00 | \n",
" 0.07 | \n",
"
\n",
" \n",
" AW | \n",
" 0.45 | \n",
" 0.07 | \n",
" -0.23 | \n",
" 1.00 | \n",
" -0.57 | \n",
" 0.48 | \n",
" 0.52 | \n",
" 0.41 | \n",
"
\n",
" \n",
" LRLS | \n",
" -0.08 | \n",
" 0.10 | \n",
" 0.51 | \n",
" -0.57 | \n",
" 1.00 | \n",
" -0.19 | \n",
" -0.06 | \n",
" -0.01 | \n",
"
\n",
" \n",
" MG | \n",
" 0.04 | \n",
" 0.36 | \n",
" 0.12 | \n",
" 0.48 | \n",
" -0.19 | \n",
" 1.00 | \n",
" 0.68 | \n",
" -0.24 | \n",
"
\n",
" \n",
" TSLK | \n",
" 0.30 | \n",
" 0.13 | \n",
" 0.00 | \n",
" 0.52 | \n",
" -0.06 | \n",
" 0.68 | \n",
" 1.00 | \n",
" -0.07 | \n",
"
\n",
" \n",
" PETRAS | \n",
" 0.61 | \n",
" 0.12 | \n",
" 0.07 | \n",
" 0.41 | \n",
" -0.01 | \n",
" -0.24 | \n",
" -0.07 | \n",
" 1.00 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Partija TT DP LSDP AW LRLS MG TSLK PETRAS\n",
"Partija \n",
"TT 1.00 0.57 0.50 0.45 -0.08 0.04 0.30 0.61\n",
"DP 0.57 1.00 0.85 0.07 0.10 0.36 0.13 0.12\n",
"LSDP 0.50 0.85 1.00 -0.23 0.51 0.12 0.00 0.07\n",
"AW 0.45 0.07 -0.23 1.00 -0.57 0.48 0.52 0.41\n",
"LRLS -0.08 0.10 0.51 -0.57 1.00 -0.19 -0.06 -0.01\n",
"MG 0.04 0.36 0.12 0.48 -0.19 1.00 0.68 -0.24\n",
"TSLK 0.30 0.13 0.00 0.52 -0.06 0.68 1.00 -0.07\n",
"PETRAS 0.61 0.12 0.07 0.41 -0.01 -0.24 -0.07 1.00"
]
},
"execution_count": 5,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"data.T.corr().round(2)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 3.20154150e-01 +0.00000000e+00j,\n",
" -3.42352852e-01 +0.00000000e+00j,\n",
" -1.63719781e-01 +0.00000000e+00j,\n",
" -1.58295388e-01 +0.00000000e+00j,\n",
" 8.57586129e-02 +0.00000000e+00j,\n",
" -4.71657375e-02 +0.00000000e+00j,\n",
" -1.48061657e-16 +0.00000000e+00j,\n",
" -3.82778902e-17 +0.00000000e+00j,\n",
" -1.65287341e-16 +0.00000000e+00j,\n",
" 1.77182475e-16 +1.36613093e-17j,\n",
" 1.77182475e-16 -1.36613093e-17j,\n",
" -2.26405114e-16 +0.00000000e+00j],\n",
" [ 4.16700182e-01 +0.00000000e+00j,\n",
" -2.29248261e-01 +0.00000000e+00j,\n",
" 1.46657006e-01 +0.00000000e+00j,\n",
" -4.92214565e-03 +0.00000000e+00j,\n",
" -1.06137660e-01 +0.00000000e+00j,\n",
" -4.93872292e-02 +0.00000000e+00j,\n",
" 2.52659466e-16 +0.00000000e+00j,\n",
" 3.35547327e-18 +0.00000000e+00j,\n",
" 1.33891767e-17 +0.00000000e+00j,\n",
" -4.65968536e-17 +9.32450063e-18j,\n",
" -4.65968536e-17 -9.32450063e-18j,\n",
" 4.07423015e-17 +0.00000000e+00j]])"
]
},
"execution_count": 23,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"import statsmodels.sandbox.tools as smt\n",
"proj = smt.tools_pca.pca(data.ix[:7,:])\n",
"proj[1][:2]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3.72602052e-01 +0.00000000e+00j,\n",
" 1.50310469e-01 +0.00000000e+00j,\n",
" 5.72978565e-02 +0.00000000e+00j,\n",
" 3.25974011e-02 +0.00000000e+00j,\n",
" 3.88242528e-03 +0.00000000e+00j,\n",
" 2.32408111e-03 +0.00000000e+00j,\n",
" 1.45793837e-17 +0.00000000e+00j,\n",
" 5.49062001e-18 +0.00000000e+00j,\n",
" 8.71149841e-19 +0.00000000e+00j,\n",
" -2.63689929e-18 +2.38692229e-18j,\n",
" -2.63689929e-18 -2.38692229e-18j, -9.44840897e-18 +0.00000000e+00j])"
]
},
"execution_count": 25,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"proj[2]"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"%matplotlib inline\n",
"import matplotlib\n",
"matplotlib.style.use('ggplot')\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def add_label(row):\n",
" ax.annotate(row.name, row.values, xytext=(10,-5), \n",
" textcoords='offset points',\n",
" size=16, color='k'); return row\n",
"\n",
"pca_df = -pd.DataFrame(data=proj[1], index=data.ix[:7,:].index, columns=['PC1', 'PC2'])\n",
"ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7))\n",
"pca_df.apply(add_label, axis=1)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"from IPython.html.widgets import *\n",
"\n",
"def pca_plot(Sauktiniai, Meras, Internetinis_balsavimas,\n",
" Darbo_kodeksas, Svietimas, Vaiko_teises,\n",
" Alkoholis, Pensijos, Pabegeliai, Dviguba_pilietybe,\n",
" Asmenvardziai, Privati_informacija):\n",
" \n",
" # Petras projekcija\n",
" data_with_petras = pd.concat([data.ix[:7,:], pd.DataFrame([range(12)], index=['PETRAS'], columns=data.columns)])\n",
" proj = smt.tools_pca.pca(data_with_petras, keepdim=2)\n",
" pca_df = -pd.DataFrame(data=proj[1], index=data_with_petras.index, columns=['PC1', 'PC2'])\n",
" ax = pca_df.plot('PC1','PC2', kind='scatter', c='k', s=50, figsize=(10,7))\n",
"\n",
" def add_label(row):\n",
" ax.annotate(row.name, row.values, xytext=(10,-5), \n",
" textcoords='offset points',\n",
" size=16, color='k'); return row\n",
"\n",
" pca_df.apply(add_label, axis=1)\n",
" plt.show()\n",
"\n",
"interact(pca_plot, Sauktiniai=0.80, Meras=0.60, Internetinis_balsavimas=0.10,\n",
" Darbo_kodeksas=0.55, Svietimas=0.50, Vaiko_teises=0.40,\n",
" Alkoholis=0.30, Pensijos=0.60, Pabegeliai=0.30, \n",
" Dviguba_pilietybe=0.80, Asmenvardziai=0.90, Privati_informacija=0.30)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": true
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2 (system-wide)",
"language": "python",
"metadata": {
"cocalc": {
"description": "Python 2 programming language",
"priority": 5,
"url": "https://www.python.org/"
}
},
"name": "python2",
"resource_dir": "/ext/jupyter/kernels/python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}