Skip to content

Commit 696f4d7

Browse files
committed
update
1 parent 226f86b commit 696f4d7

12 files changed

+1713
-747
lines changed

.Rhistory

+413-413
Large diffs are not rendered by default.

R/npiv_regression.R

+40-2
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ npiv_regression <- function(data,
175175
data_full$binary <- as.integer(data_full[[treatment_col]] > 0)
176176
binarised <- tryCatch({
177177
# Fit the model using fixest
178-
model <- fixest::feols(as.formula(paste(outcome_col, "~ binary")), data = data_full)
178+
model <- fixest::feols(as.formula(paste(outcome_col, "~ binary")), data = data_full, vcov = "HC1")
179179

180180
# Check if the "binary" coefficient is present
181181
if (!"binary" %in% names(coef(model))) {
@@ -362,6 +362,43 @@ npiv_regression <- function(data,
362362
stop(e)
363363
})
364364

365+
# browser()
366+
create_summary <- function(binarised, ACR_estimate, se_ACR, p_value_ACR) {
367+
# Extracting the coefficient, standard error, and p-value for ATT^o
368+
att_coef <- binarised[["estimate"]]
369+
att_se <- binarised[["std_error"]]
370+
att_p <- binarised[["model"]][["coeftable"]][2, 4]
371+
372+
# Determine significance stars
373+
att_sig <- ifelse(att_p < 0.001, "***",
374+
ifelse(att_p < 0.01, "**",
375+
ifelse(att_p < 0.05, "*",
376+
ifelse(att_p < 0.1, ".", ""))))
377+
378+
# Create ATT^o summary string
379+
att_summary <- sprintf("ATT^o estimate : %.3f(%.3f)%s", att_coef, att_se, att_sig)
380+
381+
# Extracting the coefficient, standard error, and p-value for ACR^o
382+
acr_coef <- ACR_estimate
383+
acr_se <- se_ACR
384+
acr_p <- p_value_ACR
385+
386+
# Determine significance stars
387+
acr_sig <- ifelse(acr_p < 0.001, "***",
388+
ifelse(acr_p < 0.01, "**",
389+
ifelse(acr_p < 0.05, "*",
390+
ifelse(acr_p < 0.1, ".", ""))))
391+
392+
# Create ACR^o summary string
393+
acr_summary <- sprintf("ACR^o estimate : %.3f(%.3f)%s", acr_coef, acr_se, acr_sig)
394+
395+
# Return the summaries
396+
list(ATT_summary = att_summary, ACR_summary = acr_summary)
397+
}
398+
399+
# Calculate summaries using the create_summary function
400+
summaries <- create_summary(binarised, ACR_estimate, se_ACR, p_value_ACR)
401+
365402
# Return results
366403
# browser()
367404
list(
@@ -388,6 +425,7 @@ npiv_regression <- function(data,
388425
ACR_upper_UCB = ACR_upper_UCB,
389426
ACR_lower_UCB = ACR_lower_UCB,
390427
ci_lower_ACR = ci_lower_ACR,
391-
ci_upper_ACR = ci_upper_ACR
428+
ci_upper_ACR = ci_upper_ACR,
429+
summary = summaries
392430
)
393431
}

application/binscatter-lost.R

+233
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
rm(list=ls())
2+
3+
# Load necessary libraries
4+
library(tidyverse)
5+
library(broom)
6+
library(fixest)
7+
library(haven)
8+
9+
longdiff_col <- read_dta("application/113746-V1/longdiff/co/longdiff_col.dta")
10+
11+
# Define variable groups
12+
holdridge <- c("ecozone_stdry", "ecozone_stwet", "ecozone_trdry", "ecozone_trwet", "ecozone_warm")
13+
conflict <- c("vioearly", "violate")
14+
conflict2 <- c("vioe" = "vioearly", "viol" = "violate")
15+
endowment <- c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")
16+
endowment2 <- c("cafe" = "cafetera", "carbon" = "carbon", "ganad" = "ganadera_neuva",
17+
"mkta" = "mktaccess", "manuf" = "manuf", "nivel" = "nivel_de_vida", "dens" = "lndens")
18+
both <- c(conflict, endowment)
19+
both2 <- c(conflict2, endowment2)
20+
diseases <- c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")
21+
diseases2 <- c("helmnh" = "helminth_nh", "hook" = "hookworm", "leish" = "leishmaniasis", "yelfev" = "yelfev")
22+
allthree <- c("helm", "hook", "leish", "yel", "land", "vio", "cafetera", "carbon", "ganad", "mkta", "manuf", "nivel")
23+
24+
# Helper function for scatter plots
25+
scatter_plot <- function(data, x_vars, y_var, title) {
26+
data %>%
27+
pivot_longer(cols = all_of(x_vars), names_to = "variable", values_to = "value") %>%
28+
ggplot(aes(x = value, y = .data[[y_var]])) +
29+
geom_point() +
30+
geom_smooth(method = "lm", se = FALSE) +
31+
facet_wrap(~ variable, scales = "free_x") +
32+
labs(title = title, y = y_var)
33+
}
34+
35+
run_regression <- function(data, mal_measure, controls = NULL) {
36+
dep_vars <- c("dlit", "dsch", "dscore")
37+
results <- list()
38+
39+
for (dv in dep_vars) {
40+
formula <- as.formula(paste(dv, "~", mal_measure, "+ bplregcol", ifelse(!is.null(controls), paste("+", paste(controls, collapse = "+")), "")))
41+
model <- feols(formula, data = data, weights = ~ sqrt(wtbpl), vcov = "hetero")
42+
results[[dv]] <- tidy(model) %>%
43+
filter(term == mal_measure) %>%
44+
select(estimate, std.error)
45+
}
46+
47+
bind_rows(results, .id = "dep_var")
48+
}
49+
50+
# Run regressions and create table
51+
table_data <- bind_rows(
52+
run_regression(longdiff_col, "poveda") %>% mutate(row = "None (basic specification)", measure = "Poveda"),
53+
run_regression(longdiff_col, "poveda", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Poveda"),
54+
run_regression(longdiff_col, "poveda", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Poveda"),
55+
run_regression(longdiff_col, "poveda", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Poveda"),
56+
run_regression(longdiff_col, "mell") %>% mutate(row = "None (basic specification)", measure = "Mellinger"),
57+
run_regression(longdiff_col, "mell", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Mellinger"),
58+
run_regression(longdiff_col, "mell", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Mellinger"),
59+
run_regression(longdiff_col, "mell", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Mellinger")
60+
)
61+
62+
# Format table
63+
formatted_table <- table_data %>%
64+
pivot_wider(
65+
id_cols = c(row, measure),
66+
names_from = dep_var,
67+
values_from = c(estimate, std.error),
68+
names_glue = "{dep_var}_{.value}"
69+
) %>%
70+
select(row, measure,
71+
dlit_estimate, dlit_std.error,
72+
dsch_estimate, dsch_std.error,
73+
dscore_estimate, dscore_std.error)
74+
75+
# Print formatted table
76+
print(formatted_table, n = Inf)
77+
78+
basic_lit_model <- feols(dlit ~ poveda + bplregcol,
79+
data = longdiff_col,
80+
weights = ~ sqrt(wtbpl),
81+
vcov = "hetero")
82+
83+
# Print the summary of the model
84+
summary(basic_lit_model)
85+
86+
library(ggplot2)
87+
88+
# Define the cont_twfe_weights function
89+
cont_twfe_weights <- function(l, D) {
90+
wt <- ( ( mean(D[D>=l]) - mean(D) ) * mean(1*(D>=l)) ) / var(D)
91+
wt
92+
}
93+
94+
# Prepare the data
95+
dose <- longdiff_col$poveda
96+
dy <- longdiff_col$dlit # Using literacy as the outcome
97+
98+
dL <- min(dose[dose>0])
99+
dU <- max(dose)
100+
101+
# Create dose grid
102+
dose_grid <- seq(dL, dU, length.out=100)
103+
104+
# Density plot of the dose
105+
dose_density_plot <- ggplot(data.frame(dose=dose[dose>0]), aes(x=dose)) +
106+
geom_density(colour = "darkblue", linewidth = 1.2) +
107+
xlim(c(min(dose_grid), max(dose_grid))) +
108+
ylab("Density") +
109+
xlab("Dose (Poveda)") +
110+
ylim(c(0,3)) +
111+
labs(title="Density of Malaria Ecology (Poveda)")
112+
113+
print(dose_density_plot)
114+
115+
# Calculate TWFE weights
116+
twfe_weights <- sapply(dose_grid, cont_twfe_weights, D=dose)
117+
118+
# Create dataframe for plotting
119+
plot_df <- data.frame(dose_grid = dose_grid, twfe_weights = twfe_weights)
120+
121+
# TWFE weights plot
122+
twfe_weights_plot <- ggplot(data=plot_df, aes(x = dose_grid, y = twfe_weights)) +
123+
geom_line(colour = "darkblue", linewidth = 1.2) +
124+
xlim(c(min(dose_grid), max(dose_grid))) +
125+
ylab("TWFE weights") +
126+
xlab("Dose (Poveda)") +
127+
geom_vline(xintercept = mean(dose), colour="black", linewidth = 0.5, linetype = "dotted") +
128+
ylim(c(0,3)) +
129+
labs(title="TWFE weights for Malaria Ecology (Poveda)")
130+
131+
print(twfe_weights_plot)
132+
133+
library(gridExtra)
134+
135+
grid.arrange(dose_density_plot, twfe_weights_plot, ncol=2)
136+
137+
library(contdid)
138+
longdiff_col <- longdiff_col %>% filter(poveda < 1) %>% filter(is.na(poveda & dlit) == FALSE)
139+
140+
res <- npiv_regression(treatment_col = "poveda", outcome_col = "dlit", data = longdiff_col)
141+
142+
# Create data frame for ATT plot
143+
att_df <- data.frame(
144+
dose = res[["Xx"]],
145+
att = res[["hhat"]],
146+
upper = res[["ATT_upper_UCB"]],
147+
lower = res[["ATT_lower_UCB"]],
148+
se = res[["sigh"]]
149+
)
150+
# Calculate 95% CI
151+
att_df$ci_lower <- att_df$att - 1.96 * att_df$se
152+
att_df$ci_upper <- att_df$att + 1.96 * att_df$se
153+
154+
155+
# ATT(d|d) plot
156+
att_plot <- ggplot(att_df, aes(x = dose)) +
157+
# UCB (wider, lighter ribbon)
158+
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightblue", alpha = 0.2) +
159+
# 95% CI (narrower, darker ribbon)
160+
geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "blue", alpha = 0.2) +
161+
# ATT line
162+
geom_line(aes(y = att), color = "blue", size = 1) +
163+
# Zero reference line
164+
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
165+
labs(title = "Nonparametric Estimates of ATT(d|d)",
166+
subtitle = "With 95% CI (dark blue) and Uniform Confidence Bands (light blue)",
167+
x = "Malaria Ecology (Poveda)",
168+
y = "Average Treatment Effect on the Treated") +
169+
scale_x_continuous(labels = scales::number_format(accuracy = 0.01),
170+
breaks = scales::pretty_breaks(n = 10)) +
171+
scale_y_continuous(labels = scales::number_format(accuracy = 0.01),
172+
breaks = scales::pretty_breaks(n = 10)) +
173+
theme_minimal(base_size = 12) +
174+
theme(
175+
plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
176+
plot.subtitle = element_text(hjust = 0.5, size = 12),
177+
axis.title = element_text(face = "bold", size = 14),
178+
axis.text = element_text(size = 12),
179+
panel.grid.minor = element_blank(),
180+
panel.grid.major = element_line(color = "gray90"),
181+
plot.margin = margin(t = 20, r = 20, b = 20, l = 20, unit = "pt"),
182+
legend.position = "none"
183+
)
184+
185+
print(att_plot)
186+
187+
print(att_plot)
188+
library(binsreg)
189+
binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose,
190+
dots = c(0, 0), # Degree 0 polynomial (means within bins)
191+
line = c(3, 0), # Cubic fit
192+
ci = c(3, 0), # Confidence intervals for cubic fit
193+
cb = c(3, 0), # Confidence band for cubic fi
194+
polyreg = 3,
195+
nsims=2000, # Add a global cubic fit
196+
simsgrid=50,
197+
legendoff = TRUE) # Turn off legend
198+
199+
binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose,
200+
dots = c(0, 0), # Degree 0 polynomial (means within bins)
201+
line = NULL, # No line connecting the dots
202+
ci = NULL, # No confidence intervals
203+
cb = NULL, # No confidence ban
204+
legendoff = TRUE)
205+
206+
# Create data frame for ACR plot
207+
acr_df <- data.frame(
208+
dose = res[["Xx"]],
209+
acr = res[["dhat"]],
210+
upper = res[["ACR_upper_UCB"]],
211+
lower = res[["ACR_lower_UCB"]],
212+
se = res[["sigd"]]
213+
)
214+
215+
# ACR plot (derivative of ATT)
216+
acr_plot <- ggplot(acr_df, aes(x = dose)) +
217+
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightgreen", alpha = 0.3) +
218+
geom_line(aes(y = acr), color = "darkgreen", size = 1) +
219+
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
220+
labs(title = "Derivative of ATT(d|d): Average Causal Response",
221+
x = "Malaria Ecology (Poveda)",
222+
y = "Average Causal Response") +
223+
theme_minimal() +
224+
theme(
225+
plot.title = element_text(hjust = 0.5, face = "bold"),
226+
axis.title = element_text(face = "bold"),
227+
panel.grid.minor = element_blank()
228+
)
229+
230+
# Print the plots
231+
print(att_plot)
232+
print(acr_plot)
233+

application/mal-dummy.R

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Clear the workspace
2+
rm(list = ls())
3+
4+
# Load necessary libraries
5+
library(fixest)
6+
library(dplyr)
7+
library(broom)
8+
library(lmtest)
9+
library(tidyverse)
10+
library(haven)
11+
library(knitr)
12+
library(sandwich)
13+
14+
# Load the dataset
15+
data <- read_dta("application/113746-V1/longdiff/co/longdiff_col.dta")
16+
17+
# Convert bplregcol to a factor variable
18+
data <- data %>% mutate(bplregcol = as.factor(bplregcol))
19+
20+
# Remove rows with missing bplregcol values
21+
data <- data %>% filter(!is.na(bplregcol))
22+
23+
# Create dummy variables for bplregcol
24+
dummies <- model.matrix(~ bplregcol, data)
25+
dummies <- dummies[, -1] # Remove the first column to avoid multicollinearity
26+
27+
# Calculate log of population density
28+
data$log_lndens <- log(data$lndens)
29+
30+
# Combine dummy variables with the original data
31+
data_with_dummies <- cbind(data, dummies)
32+
33+
# Ensure weights are included in the data (assuming wtbpl exists in the data)
34+
weights <- data_with_dummies$wtbpl
35+
36+
# Run the regression with weights
37+
model_A <- lm(dlit ~ poveda + log_lndens + nivel_de_vida + .,
38+
data = data_with_dummies[, c("dlit", "poveda", "log_lndens", "nivel_de_vida", colnames(dummies))],
39+
weights = weights)
40+
41+
# Calculate robust standard errors
42+
robust_se_A <- coeftest(model_A, vcov = vcovHC(model_A, type = "HC1"))
43+
44+
# Display summary with robust standard errors
45+
robust_se_A

0 commit comments

Comments
 (0)