CoCalc -- Chp_12.ipynb

📚 The CoCalc Library - books, templates and other resources

Project: 📚 The Library - Shared Public Version

Path: cocalc-examples / statistical-rethinking-python-PyMC3 / Chp_12.ipynb

Views: ⁹⁵⁶³⁶
License: OTHER

Kernel: Python 3

In [1]:

%matplotlib inline
import pymc3 as pm
import numpy as np
import pandas as pd
from scipy import stats
from scipy.special import expit as logistic
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

Code 12.1

In [2]:

d = pd.read_csv('Data/reedfrogs.csv', sep=",")
print(d.shape)
d.head(8)

(48, 5)

	density	pred	size	surv	propsurv
0	10	no	big	9	0.9
1	10	no	big	10	1.0
2	10	no	big	7	0.7
3	10	no	big	10	1.0
4	10	no	small	9	0.9
5	10	no	small	9	0.9
6	10	no	small	10	1.0
7	10	no	small	9	0.9

Code 12.2

In [3]:

# make the tank cluster variable
tank = np.arange(d.shape[0])
# fit
with pm.Model() as m_12_1:
    a_tank = pm.Normal('a_tank', 0, 5, shape=d.shape[0])
    p = pm.math.invlogit(a_tank[tank])
    surv = pm.Binomial('surv', n=d.density, p=p, observed=d.surv)
    trace_12_1 = pm.sample(2000, tune=2000, njobs=4)
# pm.summary(trace_12_1, alpha=.11).round(2)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 4000/4000 [00:18<00:00, 213.57it/s]

Code 12.3

In [4]:

with pm.Model() as m_12_2:
    a = pm.Normal('a', 0., 1.)
    sigma = pm.HalfCauchy('sigma', 1.)
    a_tank = pm.Normal('a_tank', a, sigma, shape=d.shape[0])
    p = pm.math.invlogit(a_tank[tank])
    surv = pm.Binomial('surv', n=d.density, p=p, observed=d.surv)
    trace_12_2 = pm.sample(2000, tune=2000, njobs=4)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 4000/4000 [00:17<00:00, 231.65it/s]

Code 12.4

In [5]:

comp_df = pm.compare(traces=[trace_12_1, trace_12_2],
                     models=[m_12_1, m_12_2],
                     method='pseudo-BMA')

comp_df.loc[:,'model'] = pd.Series(['m12.1', 'm12.2'])
comp_df = comp_df.set_index('model')
comp_df

	WAIC	pWAIC	dWAIC	weight	SE	dSE	warning
model
m12.2	200.7	21.18	0	0.65	7.15	0	1
m12.1	201.91	22.83	1.21	0.35	9.31	4.44	1

Code 12.5

In [6]:

# extract PyMC3 samples
post = pm.trace_to_dataframe(trace_12_2, varnames=['a_tank'])

# compute median intercept for each tank
# also transform to probability with logistic
d.loc[:, 'propsurv_est'] = pd.Series(logistic(post.median(axis=0).values), index=d.index)

In [7]:

_, ax = plt.subplots(1, 1, figsize=(12, 5))
# display raw proportions surviving in each tank
ax.scatter(np.arange(1, 49), d.propsurv)
ax.scatter(np.arange(1, 49), d.propsurv_est,
             facecolors='none', edgecolors='k', lw=1)
ax.hlines(logistic(np.median(trace_12_2['a'], axis=0)), 0, 49, linestyles='--')
ax.vlines([16.5, 32.5], -.05, 1.05, lw=.5)
ax.text(8, 0, "small tanks", horizontalalignment='center')
ax.text(16+8, 0, "medium tanks", horizontalalignment='center')
ax.text(32+8, 0, "large tanks", horizontalalignment='center')
ax.set_xlabel('tank', fontsize=14)
ax.set_ylabel('proportion survival', fontsize=14)
ax.set_xlim(-1, 50)
ax.set_ylim(-.05, 1.05);

Code 12.6

In [8]:

_, ax = plt.subplots(1, 2, figsize=(12, 5))
# show first 100 populations in the posterior
xrange = np.linspace(-3, 4, 200)
postcurve = [stats.norm.pdf(xrange, loc=trace_12_2['a'][i], scale=trace_12_2['sigma'][i]) for i in range(100)]
ax[0].plot(xrange, np.asarray(postcurve).T,
           alpha=.1, color='k')
ax[0].set_xlabel('log-odds survive', fontsize=14)
ax[0].set_ylabel('Density', fontsize=14);
# sample 8000 imaginary tanks from the posterior distribution
sim_tanks = np.random.normal(loc=trace_12_2['a'], scale=trace_12_2['sigma'])
# transform to probability and visualize
pm.kdeplot(logistic(sim_tanks), ax=ax[1], color='k')
ax[1].set_xlabel('probability survive', fontsize=14)
ax[1].set_ylabel('Density', fontsize=14)
plt.tight_layout();
# dens( logistic(sim_tanks) , xlab="probability survive" )

Code 12.7

In [9]:

a, sigma, nponds = 1.4, 1.5, 60
ni = np.repeat([5, 10, 25, 35], 15)

Code 12.8

In [10]:

a_pond = np.random.normal(loc=a, scale=sigma, size=nponds)

Code 12.9

In [11]:

dsim = pd.DataFrame(dict(pond=np.arange(nponds), ni=ni, true_a=a_pond))

Code 12.10

Data types related. Python is dynamically-typed.

Code 12.11

In [12]:

dsim.loc[:, 'si'] = np.random.binomial(dsim['ni'], logistic(dsim['true_a']))

Code 12.12

In [13]:

dsim.loc[:, 'p_nopool'] = dsim.si / dsim.ni

Code 12.13

In [14]:

with pm.Model() as m_12_3:
    a = pm.Normal('a', 0., 1.)
    sigma = pm.HalfCauchy('sigma', 1.)
    a_pond = pm.Normal('a_pond', a, sigma, shape=nponds)
    p = pm.math.invlogit(a_pond[dsim.pond.values])
    si = pm.Binomial('si', n=dsim.ni.values, p=p, observed=dsim.si)
    trace_12_3 = pm.sample(1000, tune=1000)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:06<00:00, 297.12it/s]

Code 12.14

In [15]:

summary_12_3 = pm.summary(trace_12_3, alpha=.11)
summary_12_3.iloc[[0, 1, 2, -3, -2, -1],:].round(2)

	mean	sd	mc_error	hpd_5.5	hpd_94.5	n_eff	Rhat
a	1.29	0.20	0.00	0.96	1.60	1827.0	1.0
a_pond__0	1.46	0.89	0.02	0.03	2.75	2000.0	1.0
a_pond__1	1.48	0.94	0.02	0.05	2.96	2000.0	1.0
a_pond__58	3.63	0.90	0.02	2.13	4.92	2000.0	1.0
a_pond__59	0.24	0.34	0.01	-0.23	0.85	2000.0	1.0
sigma	1.42	0.20	0.01	1.12	1.75	783.0	1.0

Code 12.15

In [16]:

estimated_a_pond = pm.summary(trace_12_3, alpha=.11, varnames=['a_pond'])
dsim.loc[:, 'p_partpool'] = logistic(estimated_a_pond['mean'].values)

Code 12.16

In [17]:

dsim.loc[:, 'p_true'] = logistic(dsim['true_a'].values)

Code 12.17

In [18]:

nopool_error = np.abs(dsim.p_nopool - dsim.p_true)
partpool_error = np.abs(dsim.p_partpool - dsim.p_true)

Code 12.18

In [19]:

_, ax = plt.subplots(1, 1, figsize=(12, 5))
xrange = np.arange(60)
xrange_ = xrange.reshape((4, 15))
# display raw proportions surviving in each tank
ax.scatter(xrange+1, nopool_error)
ax.scatter(xrange+1, partpool_error,
             facecolors='none', edgecolors='k', lw=1)
ax.vlines(xrange_[1:,0]+.5, -.025, 0.35, lw=.5)

textall = ["tiny (5)", "small (10)", "medium (25)", "large (30)"]
for isem in range(4):
    ax.hlines(nopool_error[xrange_[isem, :]].mean(), 
              xrange_[isem, 0]+1, xrange_[isem, -1]+1, color='C0')
    ax.hlines(partpool_error[xrange_[isem, :]].mean(), 
              xrange_[isem, 0]+1, xrange_[isem, -1]+1, color='k', linestyles='--')
    ax.text(xrange_[isem, 7]+.5, 0.32, textall[isem], horizontalalignment='center')
ax.set_xlabel('pond', fontsize=14)
ax.set_ylabel('absolute error', fontsize=14)
ax.set_xlim(-1, 62)
ax.set_ylim(-.025, 0.34);

Code 12.19

This part is more Stan and rethinking related. To do the same in PyMC3 (i.e., avoide compiling the same model twice), you need to set up the input data with theano.shared or use sampled, a functional decorator for PyMC3.

Code 12.20

In [20]:

y1 = np.random.normal(10., 1., 10000)
y2 = 10. + np.random.normal(0., 1., 10000)

Code 12.21

In [21]:

d = pd.read_csv('Data/chimpanzees.csv', sep=";")
# we change "actor" to zero-index
d.actor = (d.actor - 1).astype(int)
Nactor = len(d.actor.unique())

with pm.Model() as m_12_4:
    bp = pm.Normal('bp', 0, 10)
    bpC = pm.Normal('bpC', 0, 10)
    
    a = pm.Normal('a', 0, 10)
    sigma_actor = pm.HalfCauchy('sigma_actor', 1.)
    a_actor = pm.Normal('a_actor', 0., sigma_actor, shape=Nactor)

    p = pm.math.invlogit(a + a_actor[d.actor.values] + (bp + bpC * d.condition) * d.prosoc_left)
    pulled_left = pm.Binomial('pulled_left', 1, p, observed=d.pulled_left)

    trace_12_4 = pm.sample(5000, tune=1000, njobs=4)
# pm.traceplot(trace_12_4)
# pm.summary(trace_12_4, alpha=.11).round(2)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 6000/6000 [02:18<00:00, 43.33it/s]

Code 12.22

In [22]:

total_a_actor = np.asarray([trace_12_4['a'] + trace_12_4['a_actor'][:, i] for i in range(Nactor)])
total_a_actor.mean(axis=1).round(2)

array([-0.71,  4.62, -1.02, -1.02, -0.71,  0.23,  1.76])

Code 12.23

In [23]:

d.block = (d.block - 1).astype(int)
Nblock = len(d.block.unique())

with pm.Model() as m_12_5:
    bp = pm.Normal('bp', 0, 10)
    bpC = pm.Normal('bpC', 0, 10)
    
    a = pm.Normal('a', 0, 10)
    sigma_actor = pm.HalfCauchy('sigma_actor', 1.)
    a_actor = pm.Normal('a_actor', 0., sigma_actor, shape=Nactor)

    sigma_block = pm.HalfCauchy('sigma_block', 1.)
    a_block = pm.Normal('a_block', 0., sigma_block, shape=Nblock)
    
    p = pm.math.invlogit(a + a_actor[d.actor.values] + a_block[d.block.values]
                         + (bp + bpC * d.condition) * d.prosoc_left)
    pulled_left = pm.Binomial('pulled_left', 1, p, observed=d.pulled_left)

    trace_12_5 = pm.sample(6000, tune=1000, njobs=4)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
 82%|████████▏ | 5741/7000 [03:10<00:36, 34.51it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 3 contains 3 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
 94%|█████████▍| 6594/7000 [03:25<00:06, 60.57it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 2 contains 7 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
 98%|█████████▊| 6863/7000 [03:30<00:02, 61.19it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 1 contains 26 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
100%|█████████▉| 6994/7000 [03:32<00:00, 62.62it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 0 contains 4 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
100%|██████████| 7000/7000 [03:32<00:00, 32.99it/s]

Code 12.24

In [24]:

pm.summary(trace_12_5, alpha=.11).round(2)

	mean	sd	mc_error	hpd_5.5	hpd_94.5	n_eff	Rhat
bp	0.83	0.26	0.00	0.41	1.25	15660.0	1.0
bpC	-0.14	0.30	0.00	-0.63	0.33	15743.0	1.0
a	0.43	0.95	0.02	-1.07	1.82	3661.0	1.0
a_actor__0	-1.15	0.96	0.02	-2.60	0.30	3743.0	1.0
a_actor__1	4.19	1.64	0.02	1.78	6.38	6420.0	1.0
a_actor__2	-1.45	0.97	0.02	-2.93	0.03	3629.0	1.0
a_actor__3	-1.45	0.96	0.02	-2.93	0.01	3790.0	1.0
a_actor__4	-1.15	0.96	0.02	-2.63	0.30	3731.0	1.0
a_actor__5	-0.20	0.96	0.02	-1.60	1.32	3789.0	1.0
a_actor__6	1.34	0.98	0.02	-0.19	2.81	4029.0	1.0
a_block__0	-0.18	0.23	0.00	-0.53	0.12	4962.0	1.0
a_block__1	0.04	0.19	0.00	-0.24	0.35	11541.0	1.0
a_block__2	0.05	0.19	0.00	-0.24	0.36	12229.0	1.0
a_block__3	0.00	0.19	0.00	-0.31	0.29	14163.0	1.0
a_block__4	-0.03	0.19	0.00	-0.33	0.25	12378.0	1.0
a_block__5	0.12	0.20	0.00	-0.16	0.45	7508.0	1.0
sigma_actor	2.26	0.92	0.01	1.06	3.40	6639.0	1.0
sigma_block	0.23	0.18	0.00	0.01	0.44	2776.0	1.0

In [25]:

pm.forestplot(trace_12_5);

Code 12.25

In [26]:

_, ax = plt.subplots(1, 1, figsize=(5, 5))
pm.kdeplot(trace_12_5['sigma_actor'], ax=ax)
pm.kdeplot(trace_12_5['sigma_block'], ax=ax)
ax.text(2, 0.75, "actor", color='C0')
ax.text(0.5, 2, "block", color='C1')
ax.set_xlabel('sigma', fontsize=14)
ax.set_ylabel('density', fontsize=14)
ax.set_xlim(-0.1, 4.1)
ax.set_ylim(-0.05, 3.05);

Code 12.26

In [27]:

comp_df = pm.compare(traces=[trace_12_4, trace_12_5],
                     models=[m_12_4, m_12_5],
                     method='pseudo-BMA')

comp_df.loc[:,'model'] = pd.Series(['m12.4', 'm12.5'])
comp_df = comp_df.set_index('model')
comp_df

	WAIC	pWAIC	dWAIC	weight	SE	dSE	warning
model
m12.4	531.27	8.06	0	0.67	19.46	0	0
m12.5	532.68	10.42	1.41	0.33	19.66	1.78	0

Code 12.27

In [28]:

chimp = 2 - 1
sel_actor = np.where((d.actor == (chimp)).values)[0]
_, uni_cond = np.unique(d.loc[sel_actor, ['condition', 'prosoc_left']], 
                        axis=0, return_index=True, )
d.loc[sel_actor[uni_cond], :].head()

	actor	recipient	condition	block	trial	prosoc_left	chose_prosoc	pulled_left
73	1	NaN	0	0	3	0	0	1
72	1	NaN	0	0	1	1	1	1
108	1	7.0	1	0	2	0	0	1
116	1	5.0	1	1	16	1	1	1

In [29]:

ppc = pm.sample_ppc(trace=trace_12_4, samples=1000, model=m_12_4)

100%|██████████| 1000/1000 [00:01<00:00, 941.01it/s]

In [30]:

rt = ppc['pulled_left'][:, sel_actor]

pred_mean = np.zeros((1000, 4))
cond = d.condition.unique()
prosoc_l = d.prosoc_left.unique()
for i in range(len(rt)):
    tmp = []
    for cp in cond:
        for pl in prosoc_l:
            tmp.append(np.mean(rt[i][(d.prosoc_left[sel_actor].values==pl) &
                                     (d.condition[sel_actor].values==cp)]))
    pred_mean[i] = tmp
    
mp = pred_mean.mean(0)
hpd = pm.hpd(pred_mean, alpha=.11)

_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.fill_between(range(4), hpd[:,1], hpd[:,0], alpha=0.25, color='k')
ax.plot(mp, color='k')
chimps = d.groupby(['actor', 'prosoc_left', 'condition']).agg('mean')['pulled_left'].values.reshape(7, -1)
ax.plot(chimps[chimp], 'C0')

ax.set_ylim(0, 1.1)
ax.set_xlabel("prosoc_left/condition", fontsize=14)
ax.set_ylabel("proportion pulled left", fontsize=14)
plt.xticks(range(4), ("0/0","1/0","0/1","1/1"));

Code 12.28

In [31]:

post = pm.trace_to_dataframe(trace_12_4)
post.head()

	bp	bpC	a	a_actor__0	a_actor__1	a_actor__2	a_actor__3	a_actor__4	a_actor__5	a_actor__6	sigma_actor
0	1.144842	-0.192101	1.336637	-2.001404	3.133755	-2.554750	-2.685470	-1.728456	-0.844019	0.500316	2.390074
1	0.927530	-0.210558	1.217429	-2.172665	3.282467	-2.475223	-2.507842	-1.873292	-1.144004	0.414225	1.485859
2	0.895586	-0.189188	-0.522454	-0.054271	4.134344	-0.538216	-0.309994	-0.414063	0.816621	2.577835	2.078943
3	0.981783	-0.235233	0.141105	-0.704803	3.815423	-1.235665	-1.194887	-1.007658	0.132304	1.557893	1.254438
4	0.979146	-0.311983	1.357626	-1.988020	4.218693	-2.529535	-2.441370	-2.289553	-1.060942	0.150617	2.299303

Code 12.29

In [32]:

pm.kdeplot(trace_12_4['a_actor'][:, 5-1]);

Code 12.30

In [33]:

def p_link(prosoc_left, condition, actor, trace):
    logodds = trace['a'] + \
                trace['a_actor'][:, actor] + \
                (trace['bp'] + trace['bpC'] * condition) * prosoc_left
    return logistic(logodds)

Code 12.31

In [34]:

prosoc_left = [0,1,0,1]
condition = [0,0,1,1]

pred_raw = np.asarray([p_link(p_l, c_d, 2-1, trace_12_4) 
                       for p_l, c_d in zip(prosoc_left, condition)]).T
pred_p = pred_raw.mean(axis=0)
pred_p_PI = pm.hpd(pred_raw, alpha=.11)

Code 12.32

In [35]:

d_pred = pd.DataFrame(dict(prosoc_left=[0, 1, 0, 1],
                           condition=[0, 0, 1, 1], 
                           actor = np.repeat(2-1,4)))

Code 12.33

In [36]:

# replace varying intercept samples with zeros
# 1000 samples by 7 actors
a_actor_zeros = np.zeros((1000,7))

Code 12.34

In [37]:

def p_link(prosoc_left, condition, actor_sim, trace):
    Nsim = actor_sim.shape[0]//trace.nchains
    trace = trace[:Nsim]
    logodds = trace['a'] + \
                np.mean(trace['a_actor']*actor_sim, axis=1) + \
                (trace['bp'] + trace['bpC'] * condition) * prosoc_left
    return logistic(logodds)

pred_raw = np.asarray([p_link(p_l, c_d, a_actor_zeros, trace_12_4) 
                       for p_l, c_d in zip(prosoc_left, condition)]).T
pred_p = pred_raw.mean(axis=0)
pred_p_PI = pm.hpd(pred_raw, alpha=.11)

_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.fill_between(range(4), pred_p_PI[:,1], pred_p_PI[:,0], alpha=0.25, color='k')
ax.plot(pred_p, color='k')

ax.set_ylim(0, 1.1)
ax.set_xlabel("prosoc_left/condition", fontsize=14)
ax.set_ylabel("proportion pulled left", fontsize=14)
plt.xticks(range(4), ("0/0","1/0","0/1","1/1"));

Code 12.35

In [38]:

# replace varying intercept samples with simulations
sigma_actor = trace_12_4.get_values('sigma_actor')
a_actor_sims = np.random.normal(loc=0, 
                                scale=np.reshape(sigma_actor[:7000], (1000, 7)))

Code 12.36

In [39]:

pred_raw = np.asarray([p_link(p_l, c_d, a_actor_sims, trace_12_4) 
                       for p_l, c_d in zip(prosoc_left, condition)]).T
pred_p = pred_raw.mean(axis=0)
pred_p_PI = pm.hpd(pred_raw, alpha=.11)

_, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.fill_between(range(4), pred_p_PI[:,1], pred_p_PI[:,0], alpha=0.25, color='k')
ax.plot(pred_p, color='k')

ax.set_ylim(0, 1.1)
ax.set_xlabel("prosoc_left/condition", fontsize=14)
ax.set_ylabel("proportion pulled left", fontsize=14)
plt.xticks(range(4), ("0/0","1/0","0/1","1/1"));

Code 12.37

In [40]:

def sim_actor(tr, i):
    sim_a_actor = np.random.randn()*tr['sigma_actor'][i]
    P = np.array([0, 1, 0, 1])
    C = np.array([0, 1, 0, 1])
    p = logistic(tr['a'][i] + sim_a_actor + (tr['bp'][i] + tr['bpC'][i]*C)*P)
    return p

# sim_actor(trace_12_4, 0)

Code 12.38

In [41]:

_, ax = plt.subplots(1, 1, figsize=(5, 5))
for i in range(50):
    ax.plot(sim_actor(trace_12_4, i), color='k', alpha=.15)

ax.set_ylim(0, 1.1)
ax.set_xticks(range(4), ("0/0","1/0","0/1","1/1"))
ax.set_xlabel("prosoc_left/condition", fontsize=14)
ax.set_ylabel("proportion pulled left", fontsize=14)
plt.xticks(range(4), ("0/0","1/0","0/1","1/1"));

Code 12.39

In [42]:

dk = pd.read_csv('Data/Kline', sep=";")
dk.loc[:, 'log_pop'] = np.log(dk.population)
Nsociety = dk.shape[0]
dk.loc[:, 'society'] = np.arange(Nsociety)

with pm.Model() as m_12_6:
    sigma_society = pm.HalfCauchy('sigma_society', 1)
    a_society = pm.Normal('a_society', 0, sigma_society, shape=Nsociety),
    a = pm.Normal('a', 0, 10)
    bp = pm.Normal('bp', 0, 1)
    lam = pm.math.exp(a + a_society + bp*dk.log_pop)
    obs = pm.Poisson('total_tools', lam, observed=dk.total_tools)
    trace_12_6 = pm.sample(5000, tune=1000, njobs=4)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
 82%|████████▏ | 4924/6000 [01:58<00:26, 40.75it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:468: UserWarning: Chain 2 contains 2 diverging samples after tuning. If increasing `target_accept` does not help try to reparameterize.
  % (self._chain_id, n_diverging))
 89%|████████▉ | 5357/6000 [02:04<00:08, 74.66it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 3 does not match the target. It is 0.891742114153, but should be close to 0.8. Try to increase the number of tuning steps.
  % (self._chain_id, mean_accept, target_accept))
100%|█████████▉| 5995/6000 [02:12<00:00, 74.15it/s]/home/osvaldo/Documentos/Proyectos/01_PyMC3/pymc3/pymc3/step_methods/hmc/nuts.py:452: UserWarning: The acceptance probability in chain 0 does not match the target. It is 0.886014219993, but should be close to 0.8. Try to increase the number of tuning steps.
  % (self._chain_id, mean_accept, target_accept))
100%|██████████| 6000/6000 [02:12<00:00, 45.20it/s]

Code 12.40

In [43]:

sigma_society = trace_12_6.get_values('sigma_society', combine=True)[:, None]
a_society_sims = np.random.normal(loc=0, scale=sigma_society)
log_pop_seq = np.linspace(6, 14, 30)
a_post = trace_12_6.get_values(varname='a', combine=True)[:, None]
bp_post = trace_12_6.get_values(varname='bp', combine=True)[:, None]
link_m12_6 = np.exp(a_post + a_society_sims + bp_post*log_pop_seq)

Code 12.41

In [44]:

# plot raw data
_, axes = plt.subplots(1, 1, figsize=(5, 5))
axes.scatter(dk.log_pop, dk.total_tools)

axes.plot(log_pop_seq, np.median(link_m12_6, axis=0), '--', color='k')

for alpha in [.67, .89, .97]: 
    alpha = 1-alpha
    mu_hpd = pm.hpd(link_m12_6, alpha=alpha)
    axes.fill_between(log_pop_seq,
                      mu_hpd[:,0], mu_hpd[:,1], alpha=alpha*.5+.1, color='k')

axes.set_xlabel('log-population', fontsize=14)
axes.set_ylabel('total tools', fontsize=14)
axes.set_xlim(6.8, 12.8)
axes.set_ylim(10, 73);

In [45]:

import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__))

This notebook was createad on a computer x86_64 running debian stretch/sid and using:
Python 3.6.2
IPython 6.1.0
PyMC3 3.2
NumPy 1.13.3
Pandas 0.20.3
SciPy 0.19.1
Matplotlib 2.1.0