Regression on `p_outlier`¶

This small tutorial illustrates how we can define a regression directly on the p_outlier parameter. The way we define the regression follows the basic pattern we establish in the include argument.

Load Modules¶

In [ ]:

Copied!





import hssm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import hssm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Simulate some data¶

In [ ]:

Copied!





# synth data
n_participants = 10
conditions = ['switch', 'noswitch','someswitch']
n_samples_per_condition = 250

v_by_participant = np.random.normal(loc=0.0, scale=0.5, size=n_participants)
v_displacement_by_condition = {'switch': 1.0, 'noswitch': 0., 'someswitch': -1.0}
a_true = 1.0
z_true = 0.5
t_true = 0.5
dfs = []

for participant in range(n_participants):
    for condition in conditions:
        tmp_df = hssm.simulate_data(model = 'ddm', theta = dict(v = v_by_participant[participant] + v_displacement_by_condition[condition],
                                                                a = a_true,
                                                                z = z_true,
                                                                t = t_true), size = n_samples_per_condition)
        tmp_df['true_v'] = v_by_participant[participant] + v_displacement_by_condition[condition]
        tmp_df['true_a'] = a_true
        tmp_df['true_z'] = z_true
        tmp_df['true_t'] = t_true
        tmp_df['participant_id'] = str(participant)
        tmp_df['trialtype'] = condition
        dfs.append(tmp_df)


data_df = pd.concat(dfs).reset_index(drop=True)
# synth data
n_participants = 10
conditions = ['switch', 'noswitch','someswitch']
n_samples_per_condition = 250

v_by_participant = np.random.normal(loc=0.0, scale=0.5, size=n_participants)
v_displacement_by_condition = {'switch': 1.0, 'noswitch': 0., 'someswitch': -1.0}
a_true = 1.0
z_true = 0.5
t_true = 0.5
dfs = []

for participant in range(n_participants):
    for condition in conditions:
        tmp_df = hssm.simulate_data(model = 'ddm', theta = dict(v = v_by_participant[participant] + v_displacement_by_condition[condition],
                                                                a = a_true,
                                                                z = z_true,
                                                                t = t_true), size = n_samples_per_condition)
        tmp_df['true_v'] = v_by_participant[participant] + v_displacement_by_condition[condition]
        tmp_df['true_a'] = a_true
        tmp_df['true_z'] = z_true
        tmp_df['true_t'] = t_true
        tmp_df['participant_id'] = str(participant)
        tmp_df['trialtype'] = condition
        dfs.append(tmp_df)


data_df = pd.concat(dfs).reset_index(drop=True)

Inject Uniform noise into `rts` to simulate outliers¶

In [ ]:

Copied!





# Inject noise
p_outlier_noise = np.random.uniform(0, 0.15, size = n_participants)
for i in range(n_participants):
    # Get indices of trials to inject noise
    p_outlier_indices = np.random.choice(data_df[data_df['participant_id'] == str(i)].index,
                                         size = int(p_outlier_noise[i] * \
                                                    len(data_df[data_df['participant_id'] == str(i)])),
                                         replace = False)
    # Inject noise
    data_df.loc[p_outlier_indices, "rt"] = np.random.uniform(lower = 0., upper = 20., 
                                                             size = data_df.loc[p_outlier_indices, "rt"].shape)
# Inject noise
p_outlier_noise = np.random.uniform(0, 0.15, size = n_participants)
for i in range(n_participants):
    # Get indices of trials to inject noise
    p_outlier_indices = np.random.choice(data_df[data_df['participant_id'] == str(i)].index,
                                         size = int(p_outlier_noise[i] * \
                                                    len(data_df[data_df['participant_id'] == str(i)])),
                                         replace = False)
    # Inject noise
    data_df.loc[p_outlier_indices, "rt"] = np.random.uniform(lower = 0., upper = 20., 
                                                             size = data_df.loc[p_outlier_indices, "rt"].shape)

Define and sample from HSSM model¶

In [ ]:

Copied!





test_model_1_C = hssm.HSSM(data_df,
          model = "ddm",
          loglik_kind = "approx_differentiable",
          include = [{"name": "v", "formula": "v ~ 0 + (1 + C(trialtype) |participant_id)"}],
          p_outlier = {"formula": "p_outlier ~ 1 + (1|C(participant_id))",
                       "prior": {"Intercept": {"name": "Normal", "mu": 0, "sigma": 1}},
                       "link": "logit"
                       },
         )
test_model_1_C = hssm.HSSM(data_df,
          model = "ddm",
          loglik_kind = "approx_differentiable",
          include = [{"name": "v", "formula": "v ~ 0 + (1 + C(trialtype) |participant_id)"}],
          p_outlier = {"formula": "p_outlier ~ 1 + (1|C(participant_id))",
                       "prior": {"Intercept": {"name": "Normal", "mu": 0, "sigma": 1}},
                       "link": "logit"
                       },
         )

No common intercept. Bounds for parameter v is not applied due to a current limitation of Bambi. This will change in the future.
No common intercept. Bounds for parameter p_outlier is not applied due to a current limitation of Bambi. This will change in the future.
Model initialized successfully.

Plot Results¶

In [ ]:

Copied!





# Define output dataframe for plotting
data_df['p_outlier'] = test_model_1_C.traces.posterior.p_outlier.mean(dim = ['chain', 'draw'])
data_df[['hdi_higher', 'hdi_lower']] = az.hdi(test_model_1_C.traces.posterior.p_outlier, hdi_prob = 0.95).to_dataframe()\
                                        .reset_index().pivot(index = '__obs__', columns = 'hdi', values = 'p_outlier')
# Define output dataframe for plotting
data_df['p_outlier'] = test_model_1_C.traces.posterior.p_outlier.mean(dim = ['chain', 'draw'])
data_df[['hdi_higher', 'hdi_lower']] = az.hdi(test_model_1_C.traces.posterior.p_outlier, hdi_prob = 0.95).to_dataframe()\
                                        .reset_index().pivot(index = '__obs__', columns = 'hdi', values = 'p_outlier')

In [ ]:

Copied!

grouped_df = data_df.groupby('participant_id')[['p_outlier', 'hdi_higher', 'hdi_lower']].mean()
grouped_df['p_outlier_gt'] = p_outlier_noise
grouped_df = data_df.groupby('participant_id')[['p_outlier', 'hdi_higher', 'hdi_lower']].mean()
grouped_df['p_outlier_gt'] = p_outlier_noise

In [ ]:

Copied!

grouped_df
grouped_df

Out[ ]:

	p_outlier	hdi_higher	hdi_lower	p_outlier_gt
participant_id
0	0.129354	0.157369	0.099355	0.121861
1	0.027036	0.041060	0.014176	0.022582
2	0.030726	0.044374	0.015506	0.019633
3	0.031000	0.044278	0.015119	0.022528
4	0.058709	0.079912	0.038487	0.040094
5	0.020425	0.034789	0.008272	0.012318
6	0.081973	0.105259	0.060237	0.071441
7	0.058888	0.078537	0.037285	0.047548
8	0.071373	0.094667	0.050075	0.049949
9	0.002771	0.010616	-0.001944	0.001799

In [ ]:

Copied!





# Create forest plot
plt.figure(figsize=(10, 6))

# Plot HDI bands for each participant
for idx, row in grouped_df.iterrows():
    plt.plot([row['hdi_lower'], row['hdi_higher']], [idx, idx], 'b-', linewidth=2)
    plt.plot(row['p_outlier'], idx, 'b|')  # Plot mean point

# Plot ground truth values as red crosses
plt.plot(grouped_df['p_outlier_gt'], grouped_df.index, 'rx', label='Ground Truth', markersize=10)

plt.xlabel('p_outlier)')
plt.ylabel('participant_id')
plt.title('Forest Plot of p_outlier Estimates with 95% HDI')
plt.legend()
plt.grid(True, alpha=0.3)
# Create forest plot
plt.figure(figsize=(10, 6))

# Plot HDI bands for each participant
for idx, row in grouped_df.iterrows():
    plt.plot([row['hdi_lower'], row['hdi_higher']], [idx, idx], 'b-', linewidth=2)
    plt.plot(row['p_outlier'], idx, 'b|')  # Plot mean point

# Plot ground truth values as red crosses
plt.plot(grouped_df['p_outlier_gt'], grouped_df.index, 'rx', label='Ground Truth', markersize=10)

plt.xlabel('p_outlier)')
plt.ylabel('participant_id')
plt.title('Forest Plot of p_outlier Estimates with 95% HDI')
plt.legend()
plt.grid(True, alpha=0.3)

No description has been provided for this image

Regression on p_outlier¶

Load Modules¶

Simulate some data¶

Inject Uniform noise into rts to simulate outliers¶

Define and sample from HSSM model¶

Plot Results¶

Regression on `p_outlier`¶

Inject Uniform noise into `rts` to simulate outliers¶