import arviz as az
import matplotlib.pyplot as plt
import scipy.stats as stats


n_samples = 1000
sample_mu = stats.norm.rvs(loc=0, scale=10, size=n_samples)
sample_sigma = stats.expon.rvs(1, size=n_samples)
prior_y = stats.norm.rvs(loc=sample_mu, scale=sample_sigma)
az.plot_kde(prior_y)
plt.xlabel("y")

Text(0.5, 0, 'y')


import pymc3 as pm

with pm.Model() as model_4m2:
    mu = pm.Normal("mu", mu=0, sd=10)
    sigma = pm.Exponential("sigma", lam=1)
    y = pm.Normal("y", mu=mu, sd=sigma)


# Do some prior checks to confirm our model is similar to the one above.
with model_4m2:
    prior_checks = pm.sample_prior_predictive(samples=1000)
az.plot_trace(prior_checks)

array([[<AxesSubplot:title={'center':'sigma_log__'}>,
        <AxesSubplot:title={'center':'sigma_log__'}>],
       [<AxesSubplot:title={'center':'y'}>,
        <AxesSubplot:title={'center':'y'}>],
       [<AxesSubplot:title={'center':'sigma'}>,
        <AxesSubplot:title={'center':'sigma'}>],
       [<AxesSubplot:title={'center':'mu'}>,
        <AxesSubplot:title={'center':'mu'}>]], dtype=object)


import numpy as np
t = np.array([1,2,3])
with pm.Model() as model_4m5:
    alpha = pm.Normal("alpha", mu=150, sd=20)
    beta = pm.Uniform("beta", 0, 10)
    sigma = pm.Exponential("sigma", lam=1)
    mu = alpha + beta*(t-t.mean())
    y = pm.Normal("y", mu=mu, sd=sigma, observed=[1,2,3])


with model_4m5:
    prior_predictive = pm.sample_prior_predictive(samples=50)


def plot_predictive(prior_predictive):
    _, ax = plt.subplots()

    x = np.linspace(-2, 2, 50)

    for a, b in zip(prior_predictive["alpha"], prior_predictive["beta"]):
        y = a + b * x
        ax.plot(x, y, c="k", alpha=0.4)

    ax.set_xlabel("Predictor")
    ax.set_ylabel("Mean Outcome")


plot_predictive(prior_predictive)


x = np.linspace(0.01,10, num=100)
plt.plot(x, stats.expon.pdf(x))

[<matplotlib.lines.Line2D at 0x7f9170fcb710>]


import pandas as pd

d = pd.read_csv("Data/Howell1.csv", sep=";", header=0)
d2 = d[d.age >= 18]


with pm.Model() as m4_3:
    alpha = pm.Normal("alpha", mu=178, sd=20)
    beta = pm.Lognormal("beta", mu=0, sd=1)
    sigma = pm.Uniform("sigma", 0, 50)
    mu = alpha + beta * (d2.weight - d2.weight.mean())
    height = pm.Normal("height", mu=mu, sd=sigma, observed=d2.height)
    
with pm.Model() as m4_3_no_center:
    alpha = pm.Normal("alpha", mu=178, sd=20)
    beta = pm.Lognormal("beta", mu=0, sd=1)
    sigma = pm.Uniform("sigma", 0, 50)
    mu = alpha + beta * (d2.weight)
    height = pm.Normal("height", mu=mu, sd=sigma, observed=d2.height)


with m4_3:
    m4_3_trace = pm.sample(1000, tune=2000)
    az.plot_trace(m4_3_trace)

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, beta, alpha]

Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 14 seconds.


with m4_3_no_center:
    m4_3_no_center_trace = pm.sample(1000, tune=2000)
    az.plot_trace(m4_3_no_center_trace)

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, beta, alpha]

Sampling 4 chains for 2_000 tune and 1_000 draw iterations (8_000 + 4_000 draws total) took 24 seconds.


with m4_3:
    prior_predictive = pm.sample_prior_predictive(samples=50)
plot_predictive(prior_predictive)


with m4_3_no_center:
    prior_predictive = pm.sample_prior_predictive(samples=50)
plot_predictive(prior_predictive)


with m4_3:
    posterior_checks = pm.sample_posterior_predictive(m4_3_trace, var_names=["alpha", "beta", "height"])


with m4_3_no_center:
    posterior_checks_no_center = pm.sample_posterior_predictive(m4_3_no_center_trace, var_names=["alpha", "beta", "height"])


plot_predictive(posterior_checks)


plot_predictive(posterior_checks_no_center)


az.plot_ppc(az.from_pymc3(posterior_predictive=posterior_checks, model=m4_3));

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/IPython/core/pylabtools.py:134: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.canvas.print_figure(bytes_io, **kw)


az.plot_ppc(az.from_pymc3(posterior_predictive=posterior_checks_no_center, model=m4_3_no_center));

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/IPython/core/pylabtools.py:134: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.canvas.print_figure(bytes_io, **kw)


az.summary(m4_3_trace, kind="stats")

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/data/io_pymc3.py:100: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,


az.summary(m4_3_no_center_trace, kind="stats")

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/data/io_pymc3.py:100: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,


m4_3_trace_df = pm.trace_to_dataframe(m4_3_trace)
m4_3_trace_df.cov()


m4_3_no_center_trace_df = pm.trace_to_dataframe(m4_3_no_center_trace)
m4_3_no_center_trace_df.cov()


d = pd.read_csv("data/cherry_blossoms.csv")
# nans are not treated as in the book
az.summary(d.dropna().to_dict(orient="list"), kind="stats")


d2 = d.dropna(subset=["doy"])
num_knots = 30
knot_list = np.quantile(d2.year, np.linspace(0, 1, num_knots))


from patsy import dmatrix

B = dmatrix(
    "bs(year, knots=knots, degree=3, include_intercept=True) - 1",
    {"year": d2.year.values, "knots": knot_list[1:-1]},
)


_, ax = plt.subplots(1, 1, figsize=(12, 4))
for i in range(num_knots+2):
    ax.plot(d2.year, (B[:, i]), color="C0")
ax.set_xlabel("year")
ax.set_ylabel("basis");


with pm.Model() as m4_7:
    a = pm.Normal("a", 100, 1)
    w = pm.Normal("w", mu=0, sd=1, shape=B.shape[1])
    mu = pm.Deterministic("mu", a + pm.math.dot(np.asarray(B, order="F"), w.T))
    # mu = pm.Deterministic("mu", a + pm.math.dot(B.base, w.T))
    sigma = pm.Exponential("sigma", 1)
    D = pm.Normal("D", mu, sigma, observed=d2.doy)
    trace_m4_7 = pm.sample(1000)

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/ipykernel_launcher.py:8: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, w, a]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 24 seconds.


#15 knots


ax = az.plot_hdi(d2.year, trace_m4_7["mu"], color="k")
ax.plot(d2.year, d2.doy, "o", alpha=0.3)
fig = plt.gcf()
fig.set_size_inches(12, 4)
ax.set_xlabel("year")
ax.set_ylabel("days in year")

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,

Text(0, 0.5, 'days in year')


# 30 knots


ax = az.plot_hdi(d2.year, trace_m4_7["mu"], color="k")
ax.plot(d2.year, d2.doy, "o", alpha=0.3)
fig = plt.gcf()
fig.set_size_inches(12, 4)
ax.set_xlabel("year")
ax.set_ylabel("days in year")

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,

Text(0, 0.5, 'days in year')


# 30 knots with a wider prior


ax = az.plot_hdi(d2.year, trace_m4_7["mu"], color="k")
ax.plot(d2.year, d2.doy, "o", alpha=0.3)
fig = plt.gcf()
fig.set_size_inches(12, 4)
ax.set_xlabel("year")
ax.set_ylabel("days in year")

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,

Text(0, 0.5, 'days in year')


weights = [46.95, 43.72, 64.78, 32.59, 54.63]


for weight in weights:
    height_pred = posterior_checks['alpha'].mean() + posterior_checks['beta'].mean()*(weight - d2.weight.mean())
    heights = posterior_checks['alpha'] + posterior_checks['beta']*(weight - d2.weight.mean())
    interval = np.percentile(heights, [5.5, 100-5.5])
    print(height_pred, interval)

156.37431776319355 [155.91166524 156.83829607]
153.45484712217896 [153.01545437 153.91338618]
172.4901572459459 [171.13518451 173.90019328]
143.39487553255285 [142.45439402 144.36135774]
163.31596931829944 [162.55238549 164.1053803 ]


weights = np.array(weights)
post_heights = []
for i in range(len(posterior_checks['alpha'])):
    mu_pr = m4_3_trace['alpha'][i] + m4_3_trace['beta'][i]*(weights - d2.weight.mean())
    sigma_pr = m4_3_trace['sigma'][i]
    post_heights.append(np.random.normal(mu_pr, sigma_pr))
post_heights = np.array(post_heights)


for i, weight in enumerate(weights):
    print(weight, np.percentile(post_heights[:, i], [5.5, 100-5.5]))

46.95 [148.17744863 164.51927276]
43.72 [145.05861416 161.59470543]
64.78 [164.29963684 180.79196509]
32.59 [135.01233658 151.40385431]
54.63 [155.20406242 171.82714712]


d3 = d[d.age < 18]


with pm.Model() as m:
    alpha = pm.Normal('alpha', mu=150, sigma=20)
    beta = pm.Lognormal('beta', mu=0, sigma=1)
    sigma = pm.Uniform('sigma', 0, 20)
    mu = alpha + beta * (d3.weight - d3.weight.mean())
    _heights = pm.Normal('height', mu=mu, sigma=sigma, observed=d3.height)


with m:
    trace = pm.sample(1000, tune=1000)

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, beta, alpha]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 29 seconds.


with m:
    az.plot_trace(trace)


trace['beta'].mean()*10

27.178073654454764


weight_seq = np.arange(0, d3.weight.max())


mu_pred = np.array([trace['alpha'] + trace['beta']*(weight - d3.weight.mean()) for weight in weight_seq]).T


height_preds = []
for i in range(len(trace['alpha'])):
    mu_pred2 = trace['alpha'][i] + trace['beta'][i]*(weight_seq - d3.weight.mean())
    sigma_pred = trace['sigma'][i]
    height_preds.append(np.random.normal(mu_pred2, sigma_pred))
height_preds = np.array(height_preds)


height_preds.shape

(4000, 45)


# The distribution over the heights.
az.plot_hdi(weight_seq, height_preds)
# The MAP estimate of the relationship
plt.plot(d3.weight, trace['alpha'].mean() + trace['beta'].mean()*(d3.weight-d3.weight.mean()), color='black')
# The observations
plt.scatter(d3.weight, d3.height)
# The distribution of the mean of mu.
az.plot_hdi(weight_seq, mu_pred)

plt.xlabel('weight')
plt.ylabel('height')

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,
/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,

Text(0, 0.5, 'height')


d['log_weight'] = np.log(d.weight)


with pm.Model() as m:
    alpha = pm.Normal('alpha', mu=150, sigma=20)
    beta = pm.Lognormal('beta', mu=0, sigma=1)
    sigma = pm.Uniform('sigma', 0, 20)
    mu = alpha + beta * (d.log_weight - d.log_weight.mean())
    _heights = pm.Normal('height', mu=mu, sigma=sigma, observed=d.height)


with m:
    trace = pm.sample(1000, tune=1000)

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
  
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, beta, alpha]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 15 seconds.


with m:
    az.plot_trace(trace)


weight_seq = np.arange(0, d.log_weight.max())


mu_pred = np.array([trace['alpha'] + trace['beta']*(weight - d.log_weight.mean()) for weight in weight_seq]).T


height_preds = []
for i in range(len(trace['alpha'])):
    mu_pred2 = trace['alpha'][i] + trace['beta'][i]*(weight_seq - d.log_weight.mean())
    sigma_pred = trace['sigma'][i]
    height_preds.append(np.random.normal(mu_pred2, sigma_pred))
height_preds = np.array(height_preds)


# The distribution over the heights.
az.plot_hdi(weight_seq, height_preds, hdi_prob=0.97)
# The MAP estimate of the relationship
plt.plot(d.log_weight, trace['alpha'].mean() + trace['beta'].mean()*(d.log_weight-d.log_weight.mean()), color='black')
# The observations
plt.scatter(d.log_weight, d.height)
# The distribution of the mean of mu.
az.plot_hdi(weight_seq, mu_pred, hdi_prob=0.97)

plt.xlabel('log_weight')
plt.ylabel('height')

/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,
/Users/oadams/code/oadams.github.io/statistical_rethinking_solutions/venv/lib/python3.7/site-packages/arviz/stats/stats.py:459: FutureWarning: hdi currently interprets 2d data as (draw, shape) but this will change in a future release to (chain, draw) for coherence with other functions
  FutureWarning,

Text(0, 0.5, 'height')


d["weight_std"] = (d.weight - d.weight.mean()) / d.weight.std()
d["weight_std2"] = d.weight_std ** 2

with pm.Model() as m_4_5:
    a = pm.Normal("a", mu=100, sd=50)
    b1 = pm.Lognormal("b1", mu=2, sd=1)
    b2 = pm.Normal("b2", mu=-3, sd=1)
    sigma = pm.Uniform("sigma", lower=0, upper=50)
    mu = pm.Deterministic("mu", a + b1 * d.weight_std + b2 * d.weight_std2)
    height = pm.Normal("height", mu=mu, sd=sigma, observed=d.height)
    #trace_4_5 = pm.sample(1000, tune=1000)


with m_4_5:
    prior_check = pm.sample_prior_predictive()


N = 500
_, ax = plt.subplots(1, 1, sharey=True)
x = np.linspace(d.weight_std.min(), d.weight_std.max(), N)
for i in range(N):
    ax.plot(x, prior_check['a'][i] + prior_check['b1'][i]*x + prior_check['b2'][i]*x**2, "k", alpha=0.2)
    #ax[0].set_xlim(d2.weight.min(), d2.weight.max())
    #ax[0].set_ylim(-100, 400)
    #ax[0].axhline(0, c="k", ls="--")
    #ax[0].axhline(272, c="k")
ax.axhline(0, c="k", ls="--")
ax.axhline(272, c="k")
ax.text(x=0, y=282, s="World's tallest person (272cm)")
ax.text(x=0, y=-25, s="Embryo");
ax.set_xlabel("weight_std")
ax.set_ylabel("height")
ax.set_xlim(left=d.weight_std.min(), right=d.weight_std.max())
ax.set_ylim(bottom=0, top=272)

(0.0, 272.0)

	mean	sd	hdi_3%	hdi_97%
alpha	154.603	0.276	154.079	155.110
beta	0.904	0.043	0.823	0.982
sigma	5.108	0.198	4.718	5.467

	mean	sd	hdi_3%	hdi_97%
alpha	114.562	1.867	111.124	118.116
beta	0.890	0.041	0.817	0.970
sigma	5.098	0.194	4.750	5.468

	alpha	beta	sigma
alpha	0.076364	-0.000080	0.000981
beta	-0.000080	0.001836	0.000033
sigma	0.000981	0.000033	0.039374

	alpha	beta	sigma
alpha	3.485403	-0.075922	0.018306
beta	-0.075922	0.001690	-0.000340
sigma	0.018306	-0.000340	0.037671

	mean	sd	hdi_3%	hdi_97%
year	1533.395	291.123	1016.00	1978.00
doy	104.921	6.258	92.00	115.00
temp	6.100	0.683	4.80	7.32
temp_upper	6.938	0.812	5.56	8.40
temp_lower	5.264	0.762	3.75	6.83

Statistical Rethinking: Chapter 4 Practice Answers¶

4E1¶

4E2¶

4E3¶

4E4¶

4E5¶

4M1¶

4M2¶

4M3¶

4M4¶

4M5¶

4M6¶

4M7¶

4M8¶

Hard¶

4H1¶

4H2¶

4H3¶

4H4¶