```
library(dplyr)
set.seed(123)
<- 3000
n
# Simulate covariates (unchanged)
<- data.frame(
fake_data genre = sample(c("Comedy", "Education", "Music"), n, replace = TRUE),
length = stats::rnorm(n, mean = 10, sd = 3),
popular_channel = stats::rbinom(n, 1, 0.2)
)
# Treatment indicator (unchanged)
$treatment <- stats::rbinom(n, 1, 0.5)
fake_data
# Model parameters (coefficients) - unchanged
<- c(0.5, -0.2, 1)
beta_zero <- c(2, 0.1, 0.5)
beta_mean
# Modified treatment effect for zero probability
# We want P(zero | treated) = P(zero | control) - 0.05
# Assuming P(zero | control) = 0.3
<- 0.3
p_zero_control <- p_zero_control - 0.05
p_zero_treated <- qlogis(p_zero_treated) - qlogis(p_zero_control)
treatment_effect_zero_prob
# Treatment effect for watch time
<- 2
treatment_effect_watch_time
# Linear predictors (with modified treatment effect)
<- beta_zero[1] +
zero_prob_logit ifelse(fake_data$genre == "Education", beta_zero[2], 0) +
3] * fake_data$popular_channel +
beta_zero[* fake_data$treatment
treatment_effect_zero_prob
<- beta_mean[1] +
log_normal_mean 2] * fake_data$length +
beta_mean[3] * fake_data$popular_channel +
beta_mean[* fake_data$treatment
treatment_effect_watch_time
# Generate potential outcomes
<- within(fake_data, {
fake_data <- stats::plogis(zero_prob_logit - treatment_effect_zero_prob * treatment)
zero_prob_control <- stats::plogis(zero_prob_logit)
zero_prob_treated <- ifelse(stats::rbinom(n, 1, 1 - zero_prob_control) == 1,
watch_time_control ::rlnorm(n, log_normal_mean - treatment_effect_watch_time * treatment, 0.5),
stats0)
<- ifelse(stats::rbinom(n, 1, 1 - zero_prob_treated) == 1,
watch_time_treated ::rlnorm(n, log_normal_mean, 0.5),
stats0)
# Calculate treatment effects
<- watch_time_treated - watch_time_control
tau_watch_time <- zero_prob_treated - zero_prob_control
tau_zero_prob # Observed outcome based on treatment assignment
<- ifelse(treatment == 1, zero_prob_treated, zero_prob_control)
zero_prob <- ifelse(treatment == 1, watch_time_treated, watch_time_control)
watch_time |>
}) ::mutate(treatment = as.logical(treatment))
dplyr
::glimpse(fake_data) dplyr
```

```
Rows: 3,000
Columns: 12
$ genre <chr> "Music", "Music", "Music", "Education", "Music", "E…
$ length <dbl> 6.708240, 16.357660, 9.374124, 8.238192, 8.547379, …
$ popular_channel <int> 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, …
$ treatment <lgl> TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE…
$ watch_time <dbl> 100.22589, 73.82856, 244.60962, 0.00000, 37.70174, …
$ zero_prob <dbl> 0.5618529, 0.6224593, 0.7770722, 0.5121690, 0.81757…
$ tau_zero_prob <dbl> -0.06060638, 0.00000000, -0.04050223, -0.06227353, …
$ tau_watch_time <dbl> 100.2258876, -73.8285590, 244.6096222, 0.0000000, 1…
$ watch_time_treated <dbl> 100.225888, 0.000000, 244.609622, 0.000000, 39.4416…
$ watch_time_control <dbl> 0.00000, 73.82856, 0.00000, 0.00000, 37.70174, 0.00…
$ zero_prob_treated <dbl> 0.5618529, 0.6224593, 0.7770722, 0.5121690, 0.81757…
$ zero_prob_control <dbl> 0.6224593, 0.6224593, 0.8175745, 0.5744425, 0.81757…
```