Counterfactual Scenario Analysis with ahead::ridge2f
R-bloggers 2025-12-11
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.
In this post, we will explore how to perform counterfactual scenario analysis using the ridge2f function from the ahead package in R.
Counterfactual scenario analysis is a powerful tool for understanding the impact of different scenarios on time series data. It allows us to evaluate the performance of different models under different scenarios, and to compare the performance of different models under different scenarios.
We will use the insurance dataset from the ahead package, which contains monthly data on insurance quotes and TV advertising.
The data set is split into three parts:
-
train: historical data to learn from -
scenario: period where we apply “what-if” scenarios -
test: true future where we evaluate forecasts
The train data is used to fit the model, the scenario data is used to generate counterfactual scenarios, and the test data is used to evaluate the performance of the model under the counterfactual scenarios.
install.packages("remotes")install.packages("gridExtra")remotes::install_github("Techtonique/ahead")url <- "https://raw.githubusercontent.com/Techtonique/datasets/refs/heads/main/time_series/multivariate/insurance_quotes_advert.csv"insurance <- read.csv(url)insurance <- ts(insurance, start=c(2002, 1), frequency = 12L)library(ahead)library(ggplot2)library(gridExtra)y <- insurance[, "Quotes"]TV <- insurance[, "TV.advert"]t <- as.numeric(time(insurance))T <- length(y)# ========== 3-PART SPLIT ==========# TRAIN: Historical data to learn from# SCENARIO: Period where we apply "what-if" scenarios# TEST: True future where we evaluate forecaststrain_pct <- 0.50scenario_pct <- 0.30# test_pct <- 0.20 (remainder)train_end <- floor(train_pct * T)scenario_end <- floor((train_pct + scenario_pct) * T)# Split datay_train <- y[1:train_end]y_scenario <- y[(train_end + 1):scenario_end]y_test <- y[(scenario_end + 1):T]TV_train <- TV[1:train_end]TV_scenario <- TV[(train_end + 1):scenario_end]TV_test <- TV[(scenario_end + 1):T]t_train <- t[1:train_end]t_scenario <- t[(train_end + 1):scenario_end]t_test <- t[(scenario_end + 1):T]h_test <- length(y_test)cat("\n=== 3-PART DATA SPLIT ===\n")cat("TRAIN: periods", 1, "to", train_end, "(n =", train_end, ")\n")cat("SCENARIO: periods", train_end + 1, "to", scenario_end, "(n =", length(y_scenario), ")\n")cat("TEST: periods", scenario_end + 1, "to", T, "(n =", h_test, ")\n\n")# ========== THE KEY INSIGHT ==========cat("=== THE APPROACH ===\n")cat("1. Train on: TRAIN data only\n")cat("2. Apply scenarios to: SCENARIO period (with actual y values)\n")cat("3. Forecast into: TEST period (what we're evaluating)\n")cat("4. Compare: How do different SCENARIO assumptions affect TEST forecasts?\n\n")# ========== DEFINE SCENARIOS ==========# Baseline: TV advertising unchanged# Scenario A: TV advertising was +1 higher during scenario period# Scenario B: TV advertising was -1 lower during scenario periodTV_scenario_A <- TV_scenario + 1TV_scenario_B <- TV_scenario - 1cat("=== SCENARIOS ===\n")cat("Scenario A: TV during scenario period = actual +1\n")cat("Scenario B: TV during scenario period = actual -1\n")cat("(We're asking: 'What if TV had been different in the recent past?')\n\n")# ========== BUILD TRAINING DATA WITH SCENARIOS ==========# For Baseline scenarioy_train_Baseline <- c(y_train, y_scenario)xreg_train_Baseline <- rbind( cbind(TV = TV_train, trend = t_train), cbind(TV = TV_scenario, trend = t_scenario))# For Scenario A: combine TRAIN + SCENARIO (with modified TV)y_train_A <- y_train_Baselinexreg_train_A <- rbind( cbind(TV = TV_train, trend = t_train), cbind(TV = TV_scenario_A, trend = t_scenario))# For Scenario B: combine TRAIN + SCENARIO (with different TV)y_train_B <- y_train_Baselinexreg_train_B <- rbind( cbind(TV = TV_train, trend = t_train), cbind(TV = TV_scenario_B, trend = t_scenario))# ========== FIT MODELS ==========cat("Fitting Scenario A model...\n")set.seed(123)res_Baseline <- ridge2f( y_train_Baseline, h = h_test, xreg = xreg_train_Baseline, lags = 5, type_pi = "blockbootstrap", B = 200)cat("Fitting Scenario A model...\n")set.seed(123)res_A <- ridge2f( y_train_A, h = h_test, xreg = xreg_train_A, lags = 5, type_pi = "blockbootstrap", B = 200)cat("Fitting Scenario B model...\n")set.seed(123)res_B <- ridge2f( y_train_B, h = h_test, xreg = xreg_train_B, lags = 5, type_pi = "blockbootstrap", B = 200)# ========== COMPARISON TABLE ==========comparison <- data.frame( Period = time(y_test), Actual = as.numeric(y_test), Forecast_Baseline = as.numeric(res_Baseline$mean), Forecast_A = as.numeric(res_A$mean), Forecast_B = as.numeric(res_B$mean), Diff_A_B = as.numeric(res_A$mean) - as.numeric(res_B$mean), Diff_A_Baseline = as.numeric(res_A$mean) - as.numeric(res_Baseline$mean), Diff_B_Baseline = as.numeric(res_B$mean) - as.numeric(res_Baseline$mean), Impact_A = as.numeric(res_A$mean) - as.numeric(y_test), Impact_B = as.numeric(res_B$mean) - as.numeric(y_test), Lower_A = as.numeric(res_A$lower), Upper_A = as.numeric(res_A$upper), Lower_B = as.numeric(res_B$lower), Upper_B = as.numeric(res_B$upper))cat("\n=== TEST PERIOD FORECASTS ===\n")print(round(comparison, 2))# ========== SCENARIO IMPACT ==========colnames_comparison <- colnames(comparison)print(summary(comparison[, 6:10]))for (i in 6:10){ print(colnames_comparison[i]) print(t.test(comparison[, i]))}# ========== COVERAGE ANALYSIS ==========in_A <- sum(comparison$Actual >= comparison$Lower_A & comparison$Actual <= comparison$Upper_A)in_B <- sum(comparison$Actual >= comparison$Lower_B & comparison$Actual <= comparison$Upper_B)coverage_A <- in_A / h_test * 100coverage_B <- in_B / h_test * 100cat("\n=== PREDICTION INTERVAL COVERAGE ===\n")cat(sprintf("Scenario A: %.1f%% (%d/%d)\n", coverage_A, in_A, h_test))cat(sprintf("Scenario B: %.1f%% (%d/%d)\n", coverage_B, in_B, h_test))# ========== PLOTS ============= 3-PART DATA SPLIT ===TRAIN: periods 1 to 20 (n = 20 )SCENARIO: periods 21 to 32 (n = 12 )TEST: periods 33 to 40 (n = 8 )=== THE APPROACH ===1. Train on: TRAIN data only2. Apply scenarios to: SCENARIO period (with actual y values)3. Forecast into: TEST period (what we're evaluating)4. Compare: How do different SCENARIO assumptions affect TEST forecasts?=== SCENARIOS ===Scenario A: TV during scenario period = actual +1Scenario B: TV during scenario period = actual -1(We're asking: 'What if TV had been different in the recent past?')Fitting Scenario A model... |======================================================================| 100%Fitting Scenario A model... |======================================================================| 100%Fitting Scenario B model... |======================================================================| 100%=== TEST PERIOD FORECASTS === Period Actual Forecast_Baseline Forecast_A Forecast_B Diff_A_B1 1 12.86 12.44 12.08 12.49 -0.422 2 12.09 12.16 11.43 12.75 -1.333 3 12.93 11.49 10.29 11.53 -1.244 4 11.72 10.70 9.23 9.41 -0.195 5 15.47 10.93 11.11 8.83 2.286 6 18.44 11.47 14.21 11.22 2.997 7 17.49 12.03 14.38 14.36 0.028 8 14.49 12.59 13.28 16.04 -2.76 Diff_A_Baseline Diff_B_Baseline Impact_A Impact_B Lower_A Upper_A Lower_B1 -0.36 0.05 -0.78 -0.37 10.97 12.80 10.502 -0.74 0.59 -0.66 0.66 10.51 12.12 10.583 -1.20 0.04 -2.64 -1.40 7.65 12.97 9.904 -1.47 -1.28 -2.50 -2.31 6.77 11.75 6.255 0.18 -2.10 -4.36 -6.64 8.64 13.69 5.166 2.74 -0.24 -4.23 -7.21 10.70 18.00 6.877 2.35 2.33 -3.11 -3.13 11.35 19.12 9.148 0.69 3.45 -1.21 1.54 10.56 17.72 12.62 Upper_B1 13.812 15.343 13.734 12.895 12.386 15.087 19.608 20.18 Diff_A_B Diff_A_Baseline Diff_B_Baseline Impact_A Min. :-2.75662 Min. :-1.46811 Min. :-2.10057 Min. :-4.3592 1st Qu.:-1.26217 1st Qu.:-0.85445 1st Qu.:-0.50317 1st Qu.:-3.3915 Median :-0.30014 Median :-0.09231 Median : 0.04573 Median :-2.5692 Mean :-0.08017 Mean : 0.27400 Mean : 0.35417 Mean :-2.4371 3rd Qu.: 0.58402 3rd Qu.: 1.10542 3rd Qu.: 1.02461 3rd Qu.:-1.1051 Max. : 2.98501 Max. : 2.74174 Max. : 3.44581 Max. :-0.6627 Impact_B Min. :-7.2143 1st Qu.:-4.0080 Median :-1.8562 Mean :-2.3569 3rd Qu.:-0.1095 Max. : 1.5440 [1] "Diff_A_B"One Sample t-testdata: comparison[, i]t = -0.11961, df = 7, p-value = 0.9082alternative hypothesis: true mean is not equal to 095 percent confidence interval: -1.665035 1.504705sample estimates: mean of x -0.08016502 [1] "Diff_A_Baseline"One Sample t-testdata: comparison[, i]t = 0.49381, df = 7, p-value = 0.6366alternative hypothesis: true mean is not equal to 095 percent confidence interval: -1.038059 1.586063sample estimates:mean of x 0.2740019 [1] "Diff_B_Baseline"One Sample t-testdata: comparison[, i]t = 0.55518, df = 7, p-value = 0.5961alternative hypothesis: true mean is not equal to 095 percent confidence interval: -1.154295 1.862629sample estimates:mean of x 0.3541669 [1] "Impact_A"One Sample t-testdata: comparison[, i]t = -4.7416, df = 7, p-value = 0.002104alternative hypothesis: true mean is not equal to 095 percent confidence interval: -3.652457 -1.221723sample estimates:mean of x -2.43709 [1] "Impact_B"One Sample t-testdata: comparison[, i]t = -2.0825, df = 7, p-value = 0.07581alternative hypothesis: true mean is not equal to 095 percent confidence interval: -5.0332109 0.3193609sample estimates:mean of x -2.356925 === PREDICTION INTERVAL COVERAGE ===Scenario A: 62.5% (5/8)Scenario B: 75.0% (6/8)library(ggplot2)library(tidyr)df_long <- comparison[, 6:10] %>% pivot_longer(cols = everything(), names_to = "Variable", values_to = "Value")# Create violin plotggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) + geom_violin(trim = FALSE) + geom_jitter(width = 0.1, size = 1, alpha = 0.7) + # optional: show individual points theme_minimal() + labs(title = "Violin Plot of Comparison Columns", y = "Value", x = "Variable")# 1. Time series plot showing the full picturelibrary(ggplot2)library(tidyr)# Combine all periods for contextfull_data <- data.frame( Time = c(t_train, t_scenario, time(y_test)), Actual = c(y_train, y_scenario, y_test), Period = c(rep("Train", length(y_train)), rep("Scenario", length(y_scenario)), rep("Test", length(y_test))))forecast_data <- data.frame( Time = rep(time(y_test), 3), Forecast = c(res_Baseline$mean, res_A$mean, res_B$mean), Scenario = rep(c("Baseline", "TV +1", "TV -1"), each = h_test))ggplot() + geom_line(data = full_data, aes(x = Time, y = Actual), color = "black", size = 1) + geom_vline(xintercept = t_scenario[1], linetype = "dashed", color = "gray50", alpha = 0.7) + geom_vline(xintercept = time(y_test)[1], linetype = "dashed", color = "gray50", alpha = 0.7) + geom_line(data = forecast_data, aes(x = Time, y = Forecast, color = Scenario), size = 1) + annotate("text", x = mean(t_train), y = max(full_data$Actual), label = "TRAIN") + annotate("text", x = mean(t_scenario), y = max(full_data$Actual), label = "SCENARIO") + annotate("text", x = mean(time(y_test)), y = max(full_data$Actual), label = "TEST") + theme_minimal() + labs(title = "Counterfactual Forecasts: How Past TV Changes Affect Future Predictions", subtitle = "Different scenario assumptions in the recent past lead to different test forecasts", y = "Insurance Quotes", x = "Time") + scale_color_manual(values = c("Baseline" = "blue", "TV +1" = "red", "TV -1" = "green"))# 2. Forecast difference plot (shows the impact more clearly)diff_data <- data.frame( Time = time(y_test), Diff_A_vs_Baseline = res_A$mean - res_Baseline$mean, Diff_B_vs_Baseline = res_B$mean - res_Baseline$mean, Diff_A_vs_B = res_A$mean - res_B$mean) %>% pivot_longer(cols = -Time, names_to = "Comparison", values_to = "Difference")ggplot(diff_data, aes(x = Time, y = Difference, color = Comparison)) + geom_line(size = 1) + geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") + theme_minimal() + labs(title = "Forecast Differences: Impact of Counterfactual Scenarios", subtitle = "How much do forecasts change under different scenario assumptions?", y = "Difference in Forecasts", x = "Time")# 3. Prediction interval comparisoninterval_data <- data.frame( Time = rep(time(y_test), 2), Actual = rep(y_test, 2), Forecast = c(res_A$mean, res_B$mean), Lower = c(res_A$lower, res_B$lower), Upper = c(res_A$upper, res_B$upper), Scenario = rep(c("TV +1", "TV -1"), each = h_test))ggplot(interval_data, aes(x = Time)) + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = Scenario), alpha = 0.3) + geom_line(aes(y = Forecast, color = Scenario), size = 1) + geom_point(aes(y = Actual), color = "black", size = 2) + geom_line(aes(y = Actual), color = "black", linetype = "dashed") + facet_wrap(~Scenario, ncol = 1) + theme_minimal() + labs(title = "Prediction Intervals: Coverage Comparison", subtitle = "Black points show actual values", y = "Insurance Quotes", x = "Time")

# Multiple comparison correctioncat("\n=== STATISTICAL SIGNIFICANCE (with Bonferroni correction) ===\n")n_tests <- 5alpha_corrected <- 0.05 / n_testscat(sprintf("Adjusted alpha level: %.4f (Bonferroni correction for %d tests)\n\n", alpha_corrected, n_tests))# Focus on the most relevant contrastscontrasts <- list( "Scenario A vs B" = comparison$Diff_A_B, "Scenario A vs Baseline" = comparison$Diff_A_Baseline, "Scenario B vs Baseline" = comparison$Diff_B_Baseline)results <- data.frame( Contrast = names(contrasts), Mean_Diff = sapply(contrasts, mean), SE = sapply(contrasts, function(x) sd(x)/sqrt(length(x))), t_stat = NA, p_value = NA, CI_lower = NA, CI_upper = NA, Significant_at_0.05 = NA, Significant_corrected = NA)for(i in 1:nrow(results)) { test <- t.test(contrasts[[i]]) results$t_stat[i] <- test$statistic results$p_value[i] <- test$p.value results$CI_lower[i] <- test$conf.int[1] results$CI_upper[i] <- test$conf.int[2] results$Significant_at_0.05[i] <- test$p.value < 0.05 results$Significant_corrected[i] <- test$p.value < alpha_corrected}# Only round numeric columns for printingnumeric_cols_results <- sapply(results, is.numeric)results_display <- resultsresults_display[, numeric_cols_results] <- round(results_display[, numeric_cols_results], 4)print(results_display)# Effect sizes (Cohen's d)cat("\n=== EFFECT SIZES (Cohen's d) ===\n")cohens_d <- function(x) { mean(x) / sd(x)}effect_sizes <- data.frame( Contrast = names(contrasts), Cohens_d = sapply(contrasts, cohens_d), Interpretation = sapply(sapply(contrasts, cohens_d), function(d) { abs_d <- abs(d) if(abs_d < 0.2) "negligible" else if(abs_d < 0.5) "small" else if(abs_d < 0.8) "medium" else "large" }))print(effect_sizes)# Paired comparisons if more appropriatecat("\n=== PAIRWISE COMPARISONS ===\n")cat("Testing if forecast differences are consistently non-zero:\n\n")# Wilcoxon signed-rank test (non-parametric alternative)for(i in 1:length(contrasts)) { cat(names(contrasts)[i], ":\n") wilcox_test <- wilcox.test(contrasts[[i]], alternative = "two.sided") cat(sprintf(" Wilcoxon p-value: %.4f\n", wilcox_test$p.value)) cat(sprintf(" Median difference: %.4f\n\n", median(contrasts[[i]])))}=== STATISTICAL SIGNIFICANCE (with Bonferroni correction) ===Adjusted alpha level: 0.0100 (Bonferroni correction for 5 tests) Contrast Mean_Diff SE t_stat p_valueScenario A vs B Scenario A vs B -0.0802 0.6702 -0.1196 0.9082Scenario A vs Baseline Scenario A vs Baseline 0.2740 0.5549 0.4938 0.6366Scenario B vs Baseline Scenario B vs Baseline 0.3542 0.6379 0.5552 0.5961 CI_lower CI_upper Significant_at_0.05Scenario A vs B -1.6650 1.5047 FALSEScenario A vs Baseline -1.0381 1.5861 FALSEScenario B vs Baseline -1.1543 1.8626 FALSE Significant_correctedScenario A vs B FALSEScenario A vs Baseline FALSEScenario B vs Baseline FALSE=== EFFECT SIZES (Cohen's d) === Contrast Cohens_d InterpretationScenario A vs B Scenario A vs B -0.04228715 negligibleScenario A vs Baseline Scenario A vs Baseline 0.17458895 negligibleScenario B vs Baseline Scenario B vs Baseline 0.19628662 negligible=== PAIRWISE COMPARISONS ===Testing if forecast differences are consistently non-zero:Scenario A vs B : Wilcoxon p-value: 0.7422 Median difference: -0.3001Scenario A vs Baseline : Wilcoxon p-value: 0.9453 Median difference: -0.0923Scenario B vs Baseline : Wilcoxon p-value: 0.6406 Median difference: 0.0457R-bloggers.com offers daily e-mail updates about R news and tutorials about learning R and many other topics. Click here if you're looking to post or find an R/data-science job.
Want to share your content on R-bloggers? click here if you have a blog, or here if you don't.