|
| 1 | +rm(list=ls()) |
| 2 | + |
| 3 | +# Load necessary libraries |
| 4 | +library(tidyverse) |
| 5 | +library(broom) |
| 6 | +library(fixest) |
| 7 | +library(haven) |
| 8 | + |
| 9 | +longdiff_col <- read_dta("application/113746-V1/longdiff/co/longdiff_col.dta") |
| 10 | + |
| 11 | +# Define variable groups |
| 12 | +holdridge <- c("ecozone_stdry", "ecozone_stwet", "ecozone_trdry", "ecozone_trwet", "ecozone_warm") |
| 13 | +conflict <- c("vioearly", "violate") |
| 14 | +conflict2 <- c("vioe" = "vioearly", "viol" = "violate") |
| 15 | +endowment <- c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens") |
| 16 | +endowment2 <- c("cafe" = "cafetera", "carbon" = "carbon", "ganad" = "ganadera_neuva", |
| 17 | + "mkta" = "mktaccess", "manuf" = "manuf", "nivel" = "nivel_de_vida", "dens" = "lndens") |
| 18 | +both <- c(conflict, endowment) |
| 19 | +both2 <- c(conflict2, endowment2) |
| 20 | +diseases <- c("helminth_nh", "hookworm", "leishmaniasis", "yelfev") |
| 21 | +diseases2 <- c("helmnh" = "helminth_nh", "hook" = "hookworm", "leish" = "leishmaniasis", "yelfev" = "yelfev") |
| 22 | +allthree <- c("helm", "hook", "leish", "yel", "land", "vio", "cafetera", "carbon", "ganad", "mkta", "manuf", "nivel") |
| 23 | + |
| 24 | +# Helper function for scatter plots |
| 25 | +scatter_plot <- function(data, x_vars, y_var, title) { |
| 26 | + data %>% |
| 27 | + pivot_longer(cols = all_of(x_vars), names_to = "variable", values_to = "value") %>% |
| 28 | + ggplot(aes(x = value, y = .data[[y_var]])) + |
| 29 | + geom_point() + |
| 30 | + geom_smooth(method = "lm", se = FALSE) + |
| 31 | + facet_wrap(~ variable, scales = "free_x") + |
| 32 | + labs(title = title, y = y_var) |
| 33 | +} |
| 34 | + |
| 35 | +run_regression <- function(data, mal_measure, controls = NULL) { |
| 36 | + dep_vars <- c("dlit", "dsch", "dscore") |
| 37 | + results <- list() |
| 38 | + |
| 39 | + for (dv in dep_vars) { |
| 40 | + formula <- as.formula(paste(dv, "~", mal_measure, "+ bplregcol", ifelse(!is.null(controls), paste("+", paste(controls, collapse = "+")), ""))) |
| 41 | + model <- feols(formula, data = data, weights = ~ sqrt(wtbpl), vcov = "hetero") |
| 42 | + results[[dv]] <- tidy(model) %>% |
| 43 | + filter(term == mal_measure) %>% |
| 44 | + select(estimate, std.error) |
| 45 | + } |
| 46 | + |
| 47 | + bind_rows(results, .id = "dep_var") |
| 48 | +} |
| 49 | + |
| 50 | +# Run regressions and create table |
| 51 | +table_data <- bind_rows( |
| 52 | + run_regression(longdiff_col, "poveda") %>% mutate(row = "None (basic specification)", measure = "Poveda"), |
| 53 | + run_regression(longdiff_col, "poveda", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Poveda"), |
| 54 | + run_regression(longdiff_col, "poveda", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Poveda"), |
| 55 | + run_regression(longdiff_col, "poveda", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Poveda"), |
| 56 | + run_regression(longdiff_col, "mell") %>% mutate(row = "None (basic specification)", measure = "Mellinger"), |
| 57 | + run_regression(longdiff_col, "mell", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Mellinger"), |
| 58 | + run_regression(longdiff_col, "mell", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Mellinger"), |
| 59 | + run_regression(longdiff_col, "mell", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Mellinger") |
| 60 | +) |
| 61 | + |
| 62 | +# Format table |
| 63 | +formatted_table <- table_data %>% |
| 64 | + pivot_wider( |
| 65 | + id_cols = c(row, measure), |
| 66 | + names_from = dep_var, |
| 67 | + values_from = c(estimate, std.error), |
| 68 | + names_glue = "{dep_var}_{.value}" |
| 69 | + ) %>% |
| 70 | + select(row, measure, |
| 71 | + dlit_estimate, dlit_std.error, |
| 72 | + dsch_estimate, dsch_std.error, |
| 73 | + dscore_estimate, dscore_std.error) |
| 74 | + |
| 75 | +# Print formatted table |
| 76 | +print(formatted_table, n = Inf) |
| 77 | + |
| 78 | +basic_lit_model <- feols(dlit ~ poveda + bplregcol, |
| 79 | + data = longdiff_col, |
| 80 | + weights = ~ sqrt(wtbpl), |
| 81 | + vcov = "hetero") |
| 82 | + |
| 83 | +# Print the summary of the model |
| 84 | +summary(basic_lit_model) |
| 85 | + |
| 86 | +library(ggplot2) |
| 87 | + |
| 88 | +# Define the cont_twfe_weights function |
| 89 | +cont_twfe_weights <- function(l, D) { |
| 90 | + wt <- ( ( mean(D[D>=l]) - mean(D) ) * mean(1*(D>=l)) ) / var(D) |
| 91 | + wt |
| 92 | +} |
| 93 | + |
| 94 | +# Prepare the data |
| 95 | +dose <- longdiff_col$poveda |
| 96 | +dy <- longdiff_col$dlit # Using literacy as the outcome |
| 97 | + |
| 98 | +dL <- min(dose[dose>0]) |
| 99 | +dU <- max(dose) |
| 100 | + |
| 101 | +# Create dose grid |
| 102 | +dose_grid <- seq(dL, dU, length.out=100) |
| 103 | + |
| 104 | +# Density plot of the dose |
| 105 | +dose_density_plot <- ggplot(data.frame(dose=dose[dose>0]), aes(x=dose)) + |
| 106 | + geom_density(colour = "darkblue", linewidth = 1.2) + |
| 107 | + xlim(c(min(dose_grid), max(dose_grid))) + |
| 108 | + ylab("Density") + |
| 109 | + xlab("Dose (Poveda)") + |
| 110 | + ylim(c(0,3)) + |
| 111 | + labs(title="Density of Malaria Ecology (Poveda)") |
| 112 | + |
| 113 | +print(dose_density_plot) |
| 114 | + |
| 115 | +# Calculate TWFE weights |
| 116 | +twfe_weights <- sapply(dose_grid, cont_twfe_weights, D=dose) |
| 117 | + |
| 118 | +# Create dataframe for plotting |
| 119 | +plot_df <- data.frame(dose_grid = dose_grid, twfe_weights = twfe_weights) |
| 120 | + |
| 121 | +# TWFE weights plot |
| 122 | +twfe_weights_plot <- ggplot(data=plot_df, aes(x = dose_grid, y = twfe_weights)) + |
| 123 | + geom_line(colour = "darkblue", linewidth = 1.2) + |
| 124 | + xlim(c(min(dose_grid), max(dose_grid))) + |
| 125 | + ylab("TWFE weights") + |
| 126 | + xlab("Dose (Poveda)") + |
| 127 | + geom_vline(xintercept = mean(dose), colour="black", linewidth = 0.5, linetype = "dotted") + |
| 128 | + ylim(c(0,3)) + |
| 129 | + labs(title="TWFE weights for Malaria Ecology (Poveda)") |
| 130 | + |
| 131 | +print(twfe_weights_plot) |
| 132 | + |
| 133 | +library(gridExtra) |
| 134 | + |
| 135 | +grid.arrange(dose_density_plot, twfe_weights_plot, ncol=2) |
| 136 | + |
| 137 | +library(contdid) |
| 138 | +longdiff_col <- longdiff_col %>% filter(poveda < 1) %>% filter(is.na(poveda & dlit) == FALSE) |
| 139 | + |
| 140 | +res <- npiv_regression(treatment_col = "poveda", outcome_col = "dlit", data = longdiff_col) |
| 141 | + |
| 142 | +# Create data frame for ATT plot |
| 143 | +att_df <- data.frame( |
| 144 | + dose = res[["Xx"]], |
| 145 | + att = res[["hhat"]], |
| 146 | + upper = res[["ATT_upper_UCB"]], |
| 147 | + lower = res[["ATT_lower_UCB"]], |
| 148 | + se = res[["sigh"]] |
| 149 | +) |
| 150 | +# Calculate 95% CI |
| 151 | +att_df$ci_lower <- att_df$att - 1.96 * att_df$se |
| 152 | +att_df$ci_upper <- att_df$att + 1.96 * att_df$se |
| 153 | + |
| 154 | + |
| 155 | +# ATT(d|d) plot |
| 156 | +att_plot <- ggplot(att_df, aes(x = dose)) + |
| 157 | + # UCB (wider, lighter ribbon) |
| 158 | + geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightblue", alpha = 0.2) + |
| 159 | + # 95% CI (narrower, darker ribbon) |
| 160 | + geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "blue", alpha = 0.2) + |
| 161 | + # ATT line |
| 162 | + geom_line(aes(y = att), color = "blue", size = 1) + |
| 163 | + # Zero reference line |
| 164 | + geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") + |
| 165 | + labs(title = "Nonparametric Estimates of ATT(d|d)", |
| 166 | + subtitle = "With 95% CI (dark blue) and Uniform Confidence Bands (light blue)", |
| 167 | + x = "Malaria Ecology (Poveda)", |
| 168 | + y = "Average Treatment Effect on the Treated") + |
| 169 | + scale_x_continuous(labels = scales::number_format(accuracy = 0.01), |
| 170 | + breaks = scales::pretty_breaks(n = 10)) + |
| 171 | + scale_y_continuous(labels = scales::number_format(accuracy = 0.01), |
| 172 | + breaks = scales::pretty_breaks(n = 10)) + |
| 173 | + theme_minimal(base_size = 12) + |
| 174 | + theme( |
| 175 | + plot.title = element_text(hjust = 0.5, face = "bold", size = 16), |
| 176 | + plot.subtitle = element_text(hjust = 0.5, size = 12), |
| 177 | + axis.title = element_text(face = "bold", size = 14), |
| 178 | + axis.text = element_text(size = 12), |
| 179 | + panel.grid.minor = element_blank(), |
| 180 | + panel.grid.major = element_line(color = "gray90"), |
| 181 | + plot.margin = margin(t = 20, r = 20, b = 20, l = 20, unit = "pt"), |
| 182 | + legend.position = "none" |
| 183 | + ) |
| 184 | + |
| 185 | +print(att_plot) |
| 186 | + |
| 187 | +print(att_plot) |
| 188 | +library(binsreg) |
| 189 | +binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose, |
| 190 | + dots = c(0, 0), # Degree 0 polynomial (means within bins) |
| 191 | + line = c(3, 0), # Cubic fit |
| 192 | + ci = c(3, 0), # Confidence intervals for cubic fit |
| 193 | + cb = c(3, 0), # Confidence band for cubic fi |
| 194 | + polyreg = 3, |
| 195 | + nsims=2000, # Add a global cubic fit |
| 196 | + simsgrid=50, |
| 197 | + legendoff = TRUE) # Turn off legend |
| 198 | + |
| 199 | +binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose, |
| 200 | + dots = c(0, 0), # Degree 0 polynomial (means within bins) |
| 201 | + line = NULL, # No line connecting the dots |
| 202 | + ci = NULL, # No confidence intervals |
| 203 | + cb = NULL, # No confidence ban |
| 204 | + legendoff = TRUE) |
| 205 | + |
| 206 | +# Create data frame for ACR plot |
| 207 | +acr_df <- data.frame( |
| 208 | + dose = res[["Xx"]], |
| 209 | + acr = res[["dhat"]], |
| 210 | + upper = res[["ACR_upper_UCB"]], |
| 211 | + lower = res[["ACR_lower_UCB"]], |
| 212 | + se = res[["sigd"]] |
| 213 | +) |
| 214 | + |
| 215 | +# ACR plot (derivative of ATT) |
| 216 | +acr_plot <- ggplot(acr_df, aes(x = dose)) + |
| 217 | + geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightgreen", alpha = 0.3) + |
| 218 | + geom_line(aes(y = acr), color = "darkgreen", size = 1) + |
| 219 | + geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") + |
| 220 | + labs(title = "Derivative of ATT(d|d): Average Causal Response", |
| 221 | + x = "Malaria Ecology (Poveda)", |
| 222 | + y = "Average Causal Response") + |
| 223 | + theme_minimal() + |
| 224 | + theme( |
| 225 | + plot.title = element_text(hjust = 0.5, face = "bold"), |
| 226 | + axis.title = element_text(face = "bold"), |
| 227 | + panel.grid.minor = element_blank() |
| 228 | + ) |
| 229 | + |
| 230 | +# Print the plots |
| 231 | +print(att_plot) |
| 232 | +print(acr_plot) |
| 233 | + |
0 commit comments