Chapter 2 - Concepts and methods from basic probability and statistics¶

import warnings

import proplot as plot
from scipy import stats

warnings.filterwarnings("ignore")

%pylab inline

plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams["font.weight"] = "bold"

Populating the interactive namespace from numpy and matplotlib

Code 2.3.1¶

# Classical confidence intervals
y = np.array([35, 34, 38, 35, 37])
n = len(y)
estimate = y.mean()
se = np.std(y, ddof=1) / np.sqrt(n)
int_50 = estimate + stats.t(n - 1).ppf([0.25, 0.75]) * se
int_95 = estimate + stats.t(n - 1).ppf([0.025, 0.975]) * se
print("mean: {}".format(estimate))
print("sd: {}".format(np.std(y, ddof=1)))
print("50% CI: {}".format(int_50))
print("95% CI: {}".format(int_95))

mean: 35.8
sd: 1.6431676725154982
50% CI: [35.25570103 36.34429897]
95% CI: [33.75973786 37.84026214]

Code 2.3.2¶

# Proportions
estimate = 0.7
n = 700
se = np.sqrt(estimate * (1 - estimate) / n)
int_95 = estimate + stats.norm.ppf([0.025, 0.975]) * se
print("mean: {}".format(estimate))
print("se: {}".format(se))
print("95% CI: {}".format(int_95))

mean: 0.7
se: 0.017320508075688773
95% CI: [0.66605243 0.73394757]

Code 2.3.5¶

# Estimate ratio if syooirt fir tge death penalty among men to that among women
n_men = 500
p_hat_men = 0.75
se_men = np.sqrt(p_hat_men * (1 - p_hat_men) / n_men)

n_women = 500
p_hat_women = 0.65
se_women = np.sqrt(p_hat_women * (1 - p_hat_women) / n_women)

n_sims = 10000
p_men = stats.norm.rvs(loc=p_hat_men, scale=se_men, size=n_sims)
p_women = stats.norm.rvs(loc=p_hat_women, scale=se_women, size=n_sims)

ratio = p_men / p_women
int_95 = stats.mstats.mquantiles(ratio, prob=[0.025, 0.975])
print(int_95)

[1.0653393  1.25490508]

Data Analysis Using Regression and Multilevel/Hierarchial Models in Python Chapter 3 - Linear regression: the basics