CoCalc -- Chp_11.ipynb

📚 The CoCalc Library - books, templates and other resources

Project: 📚 The Library - Shared Public Version

Path: cocalc-examples / statistical-rethinking-python-PyMC3 / Chp_11.ipynb

Views: ⁹⁵⁶⁵¹
License: OTHER

Kernel: Python 3

In [1]:

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp
from collections import OrderedDict
from theano import shared, tensor as tt

%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

Code 11.1

In [2]:

trolley_df = pd.read_csv('Data/Trolley.csv', sep=';')
trolley_df.head()

	case	response	order	id	age	edu	intention	contact	story	action2
0	cfaqu	4	2	96;434	14	Middle School	0	1	aqu	1
1	cfbur	3	31	96;434	14	Middle School	0	1	bur	1
2	cfrub	4	16	96;434	14	Middle School	0	1	rub	1
3	cibox	3	32	96;434	14	Middle School	1	1	box	1
4	cibur	3	4	96;434	14	Middle School	1	1	bur	1

Code 11.2

In [3]:

ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .plot(kind='bar'))

ax.set_xlabel("response", fontsize=14);
ax.set_ylabel("Frequency", fontsize=14);

Code 11.3

In [4]:

ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .cumsum()
                .div(trolley_df.shape[0])
                .plot(marker='o'))

ax.set_xlim(0.9, 7.1);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("cumulative proportion", fontsize=14);

Code 11.4

In [5]:

resp_lco = (trolley_df.response
                      .value_counts()
                      .sort_index()
                      .cumsum()
                      .iloc[:-1]
                      .div(trolley_df.shape[0])
                      .apply(lambda p: np.log(p / (1. - p))))

In [6]:

ax = resp_lco.plot(marker='o')

ax.set_xlim(0.9, 7);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("log-cumulative-odds", fontsize=14);

Code 11.5

The following Ordered transformation is taken from PyMC discourse.

In [7]:

class Ordered(pm.distributions.transforms.ElemwiseTransform):
    name = "ordered"

    def forward(self, x):
        out = tt.zeros(x.shape)
        out = tt.inc_subtensor(out[0], x[0])
        out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
        return out
    
    def forward_val(self, x, point=None):
        x, = pm.distributions.distribution.draw_values([x], point=point)
        return self.forward(x)

    def backward(self, y):
        out = tt.zeros(y.shape)
        out = tt.inc_subtensor(out[0], y[0])
        out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
        return tt.cumsum(out)

    def jacobian_det(self, y):
        return tt.sum(y[1:])

In [8]:

with pm.Model() as m11_1:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6, testval=np.arange(6) - 2.5)

    pa = pm.math.sigmoid(a)

    p_cum = tt.concatenate([[0.], pa, [1.]])
    p = p_cum[1:] - p_cum[:-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1)

In [9]:

with m11_1:
    map_11_1 = pm.find_MAP()

logp = -18,941, ||grad|| = 0.45229: 100%|██████████| 14/14 [00:00<00:00, 23.21it/s] ]

Code 11.6

In [10]:

map_11_1['a']

array([-1.9160707 , -1.26658298, -0.71862013,  0.24778795,  0.88986631,
        1.76937289])

Code 11.7

In [11]:

sp.special.expit(map_11_1['a'])

array([ 0.12830038,  0.21984275,  0.32769691,  0.56163196,  0.70886258,
        0.85437967])

Code 11.8

In [12]:

with m11_1:
    trace_11_1 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:16<00:00, 160.45it/s]

In [13]:

pm.df_summary(trace_11_1, varnames=['a'], alpha=.11).round(2)

	mean	sd	hpd_5.5	hpd_94.5	n_eff	Rhat
a__0	-1.92	0.03	-1.97	-1.87	2000.0	1.0
a__1	-1.27	0.02	-1.31	-1.23	2000.0	1.0
a__2	-0.72	0.02	-0.75	-0.69	2000.0	1.0
a__3	0.25	0.02	0.21	0.28	2000.0	1.0
a__4	0.89	0.02	0.86	0.92	2000.0	1.0
a__5	1.77	0.03	1.73	1.82	2000.0	1.0

Code 11.9

In [14]:

def ordered_logistic_proba(a):
    pa = sp.special.expit(a)
    p_cum = np.concatenate(([0.], pa, [1.]))
    
    return p_cum[1:] - p_cum[:-1]

In [15]:

ordered_logistic_proba(trace_11_1['a'].mean(axis=0))

array([ 0.12753048,  0.09170783,  0.10820073,  0.2341595 ,  0.14767958,
        0.14570807,  0.14501382])

Code 11.10

In [16]:

(ordered_logistic_proba(trace_11_1['a'].mean(axis=0)) \
     * (1 + np.arange(7))).sum()

4.1999293593514384

Code 11.11

In [17]:

ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5)

array([ 0.08143763,  0.06409094,  0.08244469,  0.20927244,  0.15948963,
        0.18473514,  0.21852952])

Code 11.12

In [18]:

(ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5) \
     * (1 + np.arange(7))).sum()

4.7296090400791702

Code 11.13

In [19]:

action = shared(trolley_df.action.values)
intention = shared(trolley_df.intention.values)
contact = shared(trolley_df.contact.values)

with pm.Model() as m11_2:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    phi = bA * action + bI * intention + bC * contact

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )

In [20]:

with m11_2:
    map_11_2 = pm.find_MAP()

logp = -18,565, ||grad|| = 3.7472: 100%|██████████| 17/17 [00:00<00:00, 58.64it/s]  

Code 11.14

In [21]:

with pm.Model() as m11_3:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    bAI = pm.Normal('bAI', 0., 10.)
    bCI = pm.Normal('bCI', 0., 10.)
    phi = phi = bA * action + bI * intention + bC * contact \
            + bAI * action * intention \
            + bCI * contact * intention

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )

In [22]:

with m11_3:
    map_11_3 = pm.find_MAP()

logp = -18,489, ||grad|| = 0.91902: 100%|██████████| 26/26 [00:00<00:00, 110.84it/s]  

Code 11.15

In [23]:

def get_coefs(map_est):
    coefs = OrderedDict()
    
    for i, ai in enumerate(map_est['a']):
        coefs['a_{}'.format(i)] = ai
        
    coefs['bA'] = map_est.get('bA', np.nan)
    coefs['bI'] = map_est.get('bI', np.nan)
    coefs['bC'] = map_est.get('bC', np.nan)
    coefs['bAI'] = map_est.get('bAI', np.nan)
    coefs['bCI'] = map_est.get('bCI', np.nan)
        
    return coefs

In [24]:

(pd.DataFrame.from_dict(
    OrderedDict([
        ('m11_1', get_coefs(map_11_1)),
        ('m11_2', get_coefs(map_11_2)),
        ('m11_3', get_coefs(map_11_3))
    ]))
   .astype(np.float64)
   .round(2))

	m11_1	m11_2	m11_3
a_0	-1.92	-2.84	-2.63
a_1	-1.27	-2.16	-1.94
a_2	-0.72	-1.57	-1.34
a_3	0.25	-0.55	-0.31
a_4	0.89	0.12	0.36
a_5	1.77	1.02	1.27
bA	NaN	-0.71	-0.47
bAI	NaN	NaN	-0.45
bC	NaN	-0.96	-0.33
bCI	NaN	NaN	-1.27
bI	NaN	-0.72	-0.28

Code 11.16

In [25]:

with m11_2:
    trace_11_2 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [03:19<00:00,  7.63it/s]

In [26]:

with m11_3:
    trace_11_3 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [04:16<00:00,  7.37it/s]

In [27]:

comp_df = pm.compare([trace_11_1, trace_11_2, trace_11_3],
                     [m11_1, m11_2, m11_3])

comp_df.loc[:,'model'] = pd.Series(['m11.1', 'm11.2', 'm11.3'])
comp_df = comp_df.set_index('model')
comp_df

	WAIC	pWAIC	dWAIC	weight	SE	dSE	warning
model
m11.3	36929	10.88	0	0.95	81.38	0	0
m11.2	37090.3	9.16	161.28	0	76.42	25.88	0
m11.1	37854.4	5.94	925.44	0.05	57.69	62.95	0

Code 11.17-19

In [28]:

pp_df = pd.DataFrame(
    np.array([[0, 0, 0],
              [0, 0, 1],
              [1, 0, 0],
              [1, 0, 1],
              [0, 1, 0],
              [0, 1, 1]]),
    columns=['action', 'contact', 'intention'])

In [29]:

pp_df

	action	contact	intention
0	0	0	0
1	0	0	1
2	1	0	0
3	1	0	1
4	0	1	0
5	0	1	1

In [30]:

action.set_value(pp_df.action.values)
contact.set_value(pp_df.contact.values)
intention.set_value(pp_df.intention.values)

with m11_3:
    pp_trace_11_3 = pm.sample_ppc(trace_11_3, samples=1500)

100%|██████████| 1500/1500 [00:17<00:00, 87.99it/s] 

In [31]:

PP_COLS = ['pp_{}'.format(i) for i, _ in enumerate(pp_trace_11_3['resp_obs'])]

pp_df = pd.concat((pp_df,
                   pd.DataFrame(pp_trace_11_3['resp_obs'].T, columns=PP_COLS)),
                  axis=1)

In [32]:

pp_cum_df = (pd.melt(
                    pp_df,
                    id_vars=['action', 'contact', 'intention'],
                    value_vars=PP_COLS, value_name='resp'
                )
               .groupby(['action', 'contact', 'intention', 'resp'])
               .size()
               .div(1500)
               .rename('proba')
               .reset_index()
               .pivot_table(
                   index=['action', 'contact', 'intention'],
                   values='proba',
                   columns='resp'
                )
               .cumsum(axis=1)
               .iloc[:, :-1])

In [33]:

pp_cum_df

		resp	0	1	2	3	4	5
action	contact	intention
0	0	0	0.069333	0.128000	0.208000	0.418667	0.568667	0.757333
	0	1	0.076667	0.145333	0.238667	0.470000	0.648000	0.818667
	1	0	0.089333	0.171333	0.266667	0.514667	0.687333	0.838000
	1	1	0.330000	0.484667	0.642000	0.834667	0.906667	0.956667
1	0	0	0.109333	0.191333	0.286667	0.544000	0.705333	0.861333
1	0	1	0.179333	0.308000	0.466667	0.697333	0.822000	0.916667

In [34]:

for (plot_action, plot_contact), plot_df in pp_cum_df.groupby(level=['action', 'contact']):
    fig, ax = plt.subplots(figsize=(8, 6))
    
    ax.plot([0, 1], plot_df, c='C0');
    ax.plot([0, 1], [0, 0], '--', c='C0');
    ax.plot([0, 1], [1, 1], '--', c='C0');
    
    ax.set_xlim(0, 1);
    ax.set_xlabel("intention");
    
    ax.set_ylim(-0.05, 1.05);
    ax.set_ylabel("probability");
    
    ax.set_title(
        "action = {action}, contact = {contact}".format(
            action=plot_action, contact=plot_contact
        )
    );

Code 11.20

In [35]:

# define parameters
PROB_DRINK = 0.2 # 20% of days
RATE_WORK = 1. # average 1 manuscript per day

# sample one year of production
N = 365

In [36]:

drink = np.random.binomial(1, PROB_DRINK, size=N)
y = (1 - drink) * np.random.poisson(RATE_WORK, size=N)

Code 11.21

In [37]:

drink_zeros = drink.sum()
work_zeros = (y == 0).sum() - drink_zeros

In [38]:

bins = np.arange(y.max() + 1) - 0.5

plt.hist(y, bins=bins);
plt.bar(0., drink_zeros, width=1., bottom=work_zeros, color='C1', alpha=.5);

plt.xticks(bins + 0.5);
plt.xlabel("manuscripts completed");

plt.ylabel("Frequency");

Code 11.22

In [39]:

with pm.Model() as m11_4:
    ap = pm.Normal('ap', 0., 1.)
    p = pm.math.sigmoid(ap)
    
    al = pm.Normal('al', 0., 10.)
    lambda_ = tt.exp(al)
    
    y_obs = pm.ZeroInflatedPoisson('y_obs', 1. - p, lambda_, observed=y)

In [40]:

with m11_4:
    map_11_4 = pm.find_MAP()

logp = -462.79, ||grad|| = 0.00047532: 100%|██████████| 12/12 [00:00<00:00, 211.49it/s]s]

In [41]:

map_11_4

{'al': array(0.05019473453063987), 'ap': array(-1.426975323508402)}

Code 11.23

In [42]:

sp.special.expit(map_11_4['ap']) # probability drink

0.19357040138820736

In [43]:

np.exp(map_11_4['al']) # rate finish manuscripts, when not drinking

1.0514758350937541

Code 11.24

In [44]:

def dzip(x, p, lambda_, log=True):
    like = p**(x == 0) + (1 - p) * sp.stats.poisson.pmf(x, lambda_)
    
    return np.log(like) if log else like

Code 11.25

In [45]:

PBAR = 0.5
THETA = 5.

In [46]:

a = PBAR * THETA
b = (1 - PBAR) * THETA

In [47]:

p = np.linspace(0, 1, 100)

plt.plot(p, sp.stats.beta.pdf(p, a, b));

plt.xlim(0, 1);
plt.xlabel("probability");

plt.ylabel("Density");

Code 11.26

In [48]:

admit_df = pd.read_csv('Data/UCBadmit.csv', sep=';')
admit_df.head()

	dept	applicant.gender	admit	reject	applications
1	A	male	512	313	825
2	A	female	89	19	108
3	B	male	353	207	560
4	B	female	17	8	25
5	C	male	120	205	325

In [49]:

with pm.Model() as m11_5:
    a = pm.Normal('a', 0., 2.)
    pbar = pm.Deterministic('pbar', pm.math.sigmoid(a))

    theta = pm.Exponential('theta', 1.)
    
    admit_obs = pm.BetaBinomial(
        'admit_obs',
        pbar * theta, (1. - pbar) * theta,
        admit_df.applications.values,
        observed=admit_df.admit.values
    )

In [50]:

with m11_5:
    trace_11_5 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:02<00:00, 718.29it/s]

Code 11.27

In [51]:

pm.summary(trace_11_5, alpha=.11).round(2)

	mean	sd	mc_error	hpd_5.5	hpd_94.5	n_eff	Rhat
a	-0.37	0.31	0.01	-0.86	0.12	1860.0	1.0
pbar	0.41	0.07	0.00	0.30	0.53	1863.0	1.0
theta	2.78	0.99	0.02	1.25	4.22	1641.0	1.0

Code 11.28

In [52]:

np.percentile(trace_11_5['pbar'], [2.5, 50., 97.5])

array([ 0.28260569,  0.40670155,  0.56068546])

Code 11.29

In [53]:

pbar_hat = trace_11_5['pbar'].mean()
theta_hat = trace_11_5['theta'].mean()

p_plot = np.linspace(0, 1, 100)

plt.plot(
    p_plot,
    sp.stats.beta.pdf(p_plot, pbar_hat * theta_hat, (1. - pbar_hat) * theta_hat)
);
plt.plot(
    p_plot,
    sp.stats.beta.pdf(
        p_plot[:, np.newaxis],
        trace_11_5['pbar'][:100] * trace_11_5['theta'][:100],
        (1. - trace_11_5['pbar'][:100]) * trace_11_5['theta'][:100]
    ),
    c='C0', alpha=0.1
);

plt.xlim(0., 1.);
plt.xlabel("probability admit");

plt.ylim(0., 3.);
plt.ylabel("Density");

Code 11.30

In [54]:

with m11_5:
    pp_trace_11_5 = pm.sample_ppc(trace_11_5)

100%|██████████| 1000/1000 [00:02<00:00, 355.73it/s]

In [55]:

x_case = np.arange(admit_df.shape[0])

plt.scatter(
    x_case,
    pp_trace_11_5['admit_obs'].mean(axis=0) \
        / admit_df.applications.values
);
plt.scatter(x_case, admit_df.admit / admit_df.applications);

high = np.percentile(pp_trace_11_5['admit_obs'], 95, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, high, marker='x', c='k');

low = np.percentile(pp_trace_11_5['admit_obs'], 5, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, low, marker='x', c='k');

Code 11.31

In [56]:

mu = 3.
theta = 1.

x = np.linspace(0, 10, 100)
plt.plot(x, sp.stats.gamma.pdf(x, mu / theta, scale=theta));

In [57]:

import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__))

This notebook was createad on a computer x86_64 running debian stretch/sid and using:
Python 3.6.2
IPython 6.1.0
PyMC3 3.2
NumPy 1.13.3
Pandas 0.20.3
SciPy 0.19.1
Matplotlib 2.1.0