Source code for bauer.utils.bayes

import pandas as pd
import numpy as np
import pytensor.tensor as pt
import arviz as az
from scipy.special import expit

[docs] def get_posterior(mu1, sd1, mu2, sd2): var1, var2 = sd1**2, sd2**2 return mu1 + (var1/(var1+var2))*(mu2 - mu1), pt.sqrt((var1*var2)/(var1+var2))
def get_posterior_np(mu1, sd1, mu2, sd2): var1, var2 = sd1**2, sd2**2 return mu1 + (var1/(var1+var2))*(mu2 - mu1), np.sqrt((var1*var2)/(var1+var2))
[docs] def get_diff_dist(mu1, sd1, mu2, sd2): return mu2 - mu1, pt.sqrt(sd1**2+sd2**2)
[docs] def get_diff_dist_np(mu1, sd1, mu2, sd2): return mu2 - mu1, np.sqrt(sd1**2+sd2**2)
[docs] def cumulative_normal(x, mu, sd, s=pt.sqrt(2.)): # Cumulative distribution function for the standard normal distribution return pt.clip(0.5 + 0.5 * pt.erf((x - mu) / (sd*s)), 1e-9, 1-1e-9)
def softplus(x): return np.log(1 + np.exp(-np.abs(x))) + np.maximum(x,0) def logistic(x): return expit(x)
[docs] def summarize_ppc(ppc, groupby=None): """Single-step PPC summary (legacy). Prefer summarize_ppc_group for group-level PPCs.""" if groupby is not None: ppc = ppc.groupby(groupby).mean() e = ppc.mean(1).to_frame('p_predicted') hdi = pd.DataFrame(az.hdi(ppc.T.values), index=ppc.index, columns=['hdi025', 'hdi975']) return pd.concat((e, hdi), axis=1)
def summarize_ppc_group(ppc, condition_cols, subject_col='subject', hdi_prob=0.95): """Two-step group-mean PPC summary. For each posterior sample, first averages simulated choices within each (subject, condition) cell, then averages those subject means across subjects. The HDI is then computed across the resulting per-sample group means. This correctly represents "where 95 % of plausible group-mean observations would fall under the model" — including both parameter uncertainty and Bernoulli noise — rather than the uncertainty on the per-trial probability. Parameters ---------- ppc : pd.DataFrame, shape (trials, posterior_samples) Raw PPC samples from ``model.ppc().xs(var_name, level='variable')``. Posterior-sample columns can be any column not in *condition_cols* or *subject_col*. All grouping keys (including *subject_col*) must be present either as index levels or as regular columns. condition_cols : list of str Condition dimensions to summarise over, e.g. ``['n1', 'log_ratio_bin']`` or ``['order', 'n_safe_bin', 'log_ratio_bin']``. These must be index levels or columns of *ppc*. subject_col : str Name of the subject dimension (default ``'subject'``). hdi_prob : float Posterior mass for HDI interval (default 0.95). Returns ------- pd.DataFrame Index: *condition_cols*. Columns: ``p_predicted`` (posterior mean), ``hdi025``, ``hdi975``. Examples -------- >>> ppc_ll = model.ppc(data, idata, var_names=['ll_bernoulli']) ... .xs('ll_bernoulli', level='variable') >>> # add derived condition columns if needed >>> ppc_ll_flat = ppc_ll.reset_index() >>> ppc_ll_flat['bin'] = pd.cut(ppc_ll_flat['log(n2/n1)'], 12) \ ... .apply(lambda x: x.mid).astype(float) >>> summary = summarize_ppc_group(ppc_ll_flat, ... condition_cols=['n1', 'bin']) >>> # summary has columns p_predicted, hdi025, hdi975 indexed by (n1, bin) """ # After ppc_ll.reset_index(), pandas adds former-index columns as tuples # (name, '') because ppc_ll.columns is a MultiIndex(chain, draw). # Flatten those back to plain strings so the isinstance check works. if isinstance(ppc.columns, pd.MultiIndex): ppc = ppc.copy() ppc.columns = [c[0] if (isinstance(c, tuple) and len(c) >= 2 and c[1] == '') else c for c in ppc.columns] # Posterior-sample columns have tuple names (chain, draw) — not strings. # Paradigm columns are strings. Exclude condition and subject columns too. excluded = set(condition_cols) | {subject_col} sample_cols = [c for c in ppc.columns if c not in excluded and not isinstance(c, str)] if not sample_cols: # Fallback when caller already renamed sample cols to integers sample_cols = ppc.select_dtypes('number').columns.difference( list(excluded)).tolist() # Step 1: average within (subject, condition) for each posterior sample ppc_subj = ppc.groupby([subject_col] + condition_cols)[sample_cols].mean() # Step 2: average subject means within each condition for each sample ppc_group = ppc_subj.groupby(condition_cols).mean() # Step 3: mean and HDI across samples e = ppc_group.mean(axis=1).to_frame('p_predicted') hdi_vals = pd.DataFrame( az.hdi(ppc_group.T.values, hdi_prob=hdi_prob), index=ppc_group.index, columns=['hdi025', 'hdi975'], ) return pd.concat([e, hdi_vals], axis=1)
[docs] def get_posterior_np(mu1, sd1, mu2, sd2): var1, var2 = sd1**2, sd2**2 return mu1 + (var1/(var1+var2))*(mu2 - mu1), np.sqrt((var1*var2)/(var1+var2))