oddish3
diff --git a/‎.Rhistory
+413-413 b/‎.Rhistory
+413-413
diff --git a/‎R/npiv_regression.R
+40-2 b/‎R/npiv_regression.R
+40-2
diff --git a/‎application/binscatter-lost.R
+233 b/‎application/binscatter-lost.R
+233
diff --git a/‎application/mal-dummy.R
+45 b/‎application/mal-dummy.R
+45
@@ -175,7 +175,7 @@ npiv_regression <- function(data,
   data_full$binary <- as.integer(data_full[[treatment_col]] > 0)
   binarised <- tryCatch({
     # Fit the model using fixest
-    model <- fixest::feols(as.formula(paste(outcome_col, "~ binary")), data = data_full)
+    model <- fixest::feols(as.formula(paste(outcome_col, "~ binary")), data = data_full, vcov = "HC1")
 
     # Check if the "binary" coefficient is present
     if (!"binary" %in% names(coef(model))) {
@@ -362,6 +362,43 @@ npiv_regression <- function(data,
     stop(e)
   })
 
+  # browser()
+  create_summary <- function(binarised, ACR_estimate, se_ACR, p_value_ACR) {
+    # Extracting the coefficient, standard error, and p-value for ATT^o
+    att_coef <- binarised[["estimate"]]
+    att_se <- binarised[["std_error"]]
+    att_p <- binarised[["model"]][["coeftable"]][2, 4]
+
+    # Determine significance stars
+    att_sig <- ifelse(att_p < 0.001, "***",
+                      ifelse(att_p < 0.01, "**",
+                             ifelse(att_p < 0.05, "*",
+                                    ifelse(att_p < 0.1, ".", ""))))
+
+    # Create ATT^o summary string
+    att_summary <- sprintf("ATT^o estimate : %.3f(%.3f)%s", att_coef, att_se, att_sig)
+
+    # Extracting the coefficient, standard error, and p-value for ACR^o
+    acr_coef <- ACR_estimate
+    acr_se <- se_ACR
+    acr_p <- p_value_ACR
+
+    # Determine significance stars
+    acr_sig <- ifelse(acr_p < 0.001, "***",
+                      ifelse(acr_p < 0.01, "**",
+                             ifelse(acr_p < 0.05, "*",
+                                    ifelse(acr_p < 0.1, ".", ""))))
+
+    # Create ACR^o summary string
+    acr_summary <- sprintf("ACR^o estimate : %.3f(%.3f)%s", acr_coef, acr_se, acr_sig)
+
+    # Return the summaries
+    list(ATT_summary = att_summary, ACR_summary = acr_summary)
+  }
+
+  # Calculate summaries using the create_summary function
+  summaries <- create_summary(binarised, ACR_estimate, se_ACR, p_value_ACR)
+
   # Return results
   # browser()
   list(
@@ -388,6 +425,7 @@ npiv_regression <- function(data,
     ACR_upper_UCB = ACR_upper_UCB,
     ACR_lower_UCB = ACR_lower_UCB,
     ci_lower_ACR = ci_lower_ACR,
-    ci_upper_ACR = ci_upper_ACR
+    ci_upper_ACR = ci_upper_ACR,
+    summary = summaries
   )
 }
@@ -0,0 +1,233 @@
+rm(list=ls())
+
+# Load necessary libraries
+library(tidyverse)
+library(broom)
+library(fixest)
+library(haven)
+
+longdiff_col <- read_dta("application/113746-V1/longdiff/co/longdiff_col.dta")
+
+# Define variable groups
+holdridge <- c("ecozone_stdry", "ecozone_stwet", "ecozone_trdry", "ecozone_trwet", "ecozone_warm")
+conflict <- c("vioearly", "violate")
+conflict2 <- c("vioe" = "vioearly", "viol" = "violate")
+endowment <- c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")
+endowment2 <- c("cafe" = "cafetera", "carbon" = "carbon", "ganad" = "ganadera_neuva",
+                "mkta" = "mktaccess", "manuf" = "manuf", "nivel" = "nivel_de_vida", "dens" = "lndens")
+both <- c(conflict, endowment)
+both2 <- c(conflict2, endowment2)
+diseases <- c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")
+diseases2 <- c("helmnh" = "helminth_nh", "hook" = "hookworm", "leish" = "leishmaniasis", "yelfev" = "yelfev")
+allthree <- c("helm", "hook", "leish", "yel", "land", "vio", "cafetera", "carbon", "ganad", "mkta", "manuf", "nivel")
+
+# Helper function for scatter plots
+scatter_plot <- function(data, x_vars, y_var, title) {
+  data %>%
+    pivot_longer(cols = all_of(x_vars), names_to = "variable", values_to = "value") %>%
+    ggplot(aes(x = value, y = .data[[y_var]])) +
+    geom_point() +
+    geom_smooth(method = "lm", se = FALSE) +
+    facet_wrap(~ variable, scales = "free_x") +
+    labs(title = title, y = y_var)
+}
+
+run_regression <- function(data, mal_measure, controls = NULL) {
+  dep_vars <- c("dlit", "dsch", "dscore")
+  results <- list()
+
+  for (dv in dep_vars) {
+    formula <- as.formula(paste(dv, "~", mal_measure, "+ bplregcol", ifelse(!is.null(controls), paste("+", paste(controls, collapse = "+")), "")))
+    model <- feols(formula, data = data, weights = ~ sqrt(wtbpl), vcov = "hetero")
+    results[[dv]] <- tidy(model) %>%
+      filter(term == mal_measure) %>%
+      select(estimate, std.error)
+  }
+
+  bind_rows(results, .id = "dep_var")
+}
+
+# Run regressions and create table
+table_data <- bind_rows(
+  run_regression(longdiff_col, "poveda") %>% mutate(row = "None (basic specification)", measure = "Poveda"),
+  run_regression(longdiff_col, "poveda", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Poveda"),
+  run_regression(longdiff_col, "poveda", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Poveda"),
+  run_regression(longdiff_col, "poveda", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Poveda"),
+  run_regression(longdiff_col, "mell") %>% mutate(row = "None (basic specification)", measure = "Mellinger"),
+  run_regression(longdiff_col, "mell", c("vioearly", "violate")) %>% mutate(row = "Conflict", measure = "Mellinger"),
+  run_regression(longdiff_col, "mell", c("cafetera", "carbon", "ganadera_neuva", "mktaccess", "manuf", "nivel_de_vida", "lndens")) %>% mutate(row = "Economic activity", measure = "Mellinger"),
+  run_regression(longdiff_col, "mell", c("helminth_nh", "hookworm", "leishmaniasis", "yelfev")) %>% mutate(row = "Other diseases", measure = "Mellinger")
+)
+
+# Format table
+formatted_table <- table_data %>%
+  pivot_wider(
+    id_cols = c(row, measure),
+    names_from = dep_var,
+    values_from = c(estimate, std.error),
+    names_glue = "{dep_var}_{.value}"
+  ) %>%
+  select(row, measure,
+         dlit_estimate, dlit_std.error,
+         dsch_estimate, dsch_std.error,
+         dscore_estimate, dscore_std.error)
+
+# Print formatted table
+print(formatted_table, n = Inf)
+
+basic_lit_model <- feols(dlit ~ poveda + bplregcol,
+                         data = longdiff_col,
+                         weights = ~ sqrt(wtbpl),
+                         vcov = "hetero")
+
+# Print the summary of the model
+summary(basic_lit_model)
+
+library(ggplot2)
+
+# Define the cont_twfe_weights function
+cont_twfe_weights <- function(l, D) {
+  wt <- ( ( mean(D[D>=l]) - mean(D) ) * mean(1*(D>=l)) ) / var(D)
+  wt
+}
+
+# Prepare the data
+dose <- longdiff_col$poveda
+dy <- longdiff_col$dlit  # Using literacy as the outcome
+
+dL <- min(dose[dose>0])
+dU <- max(dose)
+
+# Create dose grid
+dose_grid <- seq(dL, dU, length.out=100)
+
+# Density plot of the dose
+dose_density_plot <- ggplot(data.frame(dose=dose[dose>0]), aes(x=dose)) +
+  geom_density(colour = "darkblue", linewidth = 1.2) +
+  xlim(c(min(dose_grid), max(dose_grid))) +
+  ylab("Density") +
+  xlab("Dose (Poveda)") +
+  ylim(c(0,3)) +
+  labs(title="Density of Malaria Ecology (Poveda)")
+
+print(dose_density_plot)
+
+# Calculate TWFE weights
+twfe_weights <- sapply(dose_grid, cont_twfe_weights, D=dose)
+
+# Create dataframe for plotting
+plot_df <- data.frame(dose_grid = dose_grid, twfe_weights = twfe_weights)
+
+# TWFE weights plot
+twfe_weights_plot <- ggplot(data=plot_df, aes(x = dose_grid, y = twfe_weights)) +
+  geom_line(colour = "darkblue", linewidth = 1.2) +
+  xlim(c(min(dose_grid), max(dose_grid))) +
+  ylab("TWFE weights") +
+  xlab("Dose (Poveda)") +
+  geom_vline(xintercept = mean(dose), colour="black", linewidth = 0.5, linetype = "dotted") +
+  ylim(c(0,3)) +
+  labs(title="TWFE weights for Malaria Ecology (Poveda)")
+
+print(twfe_weights_plot)
+
+library(gridExtra)
+
+grid.arrange(dose_density_plot, twfe_weights_plot, ncol=2)
+
+library(contdid)
+longdiff_col <- longdiff_col %>% filter(poveda < 1) %>% filter(is.na(poveda & dlit) == FALSE)
+
+res <- npiv_regression(treatment_col = "poveda", outcome_col = "dlit", data = longdiff_col)
+
+# Create data frame for ATT plot
+att_df <- data.frame(
+  dose =  res[["Xx"]],
+  att = res[["hhat"]],
+  upper = res[["ATT_upper_UCB"]],
+  lower = res[["ATT_lower_UCB"]],
+  se =  res[["sigh"]]
+)
+# Calculate 95% CI
+att_df$ci_lower <- att_df$att - 1.96 * att_df$se
+att_df$ci_upper <- att_df$att + 1.96 * att_df$se
+
+
+# ATT(d|d) plot
+att_plot <- ggplot(att_df, aes(x = dose)) +
+  # UCB (wider, lighter ribbon)
+  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightblue", alpha = 0.2) +
+  # 95% CI (narrower, darker ribbon)
+  geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper), fill = "blue", alpha = 0.2) +
+  # ATT line
+  geom_line(aes(y = att), color = "blue", size = 1) +
+  # Zero reference line
+  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+  labs(title = "Nonparametric Estimates of ATT(d|d)",
+       subtitle = "With 95% CI (dark blue) and Uniform Confidence Bands (light blue)",
+       x = "Malaria Ecology (Poveda)",
+       y = "Average Treatment Effect on the Treated") +
+  scale_x_continuous(labels = scales::number_format(accuracy = 0.01),
+                     breaks = scales::pretty_breaks(n = 10)) +
+  scale_y_continuous(labels = scales::number_format(accuracy = 0.01),
+                     breaks = scales::pretty_breaks(n = 10)) +
+  theme_minimal(base_size = 12) +
+  theme(
+    plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
+    plot.subtitle = element_text(hjust = 0.5, size = 12),
+    axis.title = element_text(face = "bold", size = 14),
+    axis.text = element_text(size = 12),
+    panel.grid.minor = element_blank(),
+    panel.grid.major = element_line(color = "gray90"),
+    plot.margin = margin(t = 20, r = 20, b = 20, l = 20, unit = "pt"),
+    legend.position = "none"
+  )
+
+print(att_plot)
+
+print(att_plot)
+library(binsreg)
+binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose,
+                           dots = c(0, 0),  # Degree 0 polynomial (means within bins)
+                           line = c(3, 0),  # Cubic fit
+                           ci = c(3, 0),    # Confidence intervals for cubic fit
+                           cb = c(3, 0),    # Confidence band for cubic fi
+                           polyreg = 3,
+                           nsims=2000, # Add a global cubic fit
+                           simsgrid=50,
+                           legendoff = TRUE) # Turn off legend
+
+binscatter_plot <- binsreg(y = att_df$att, x = att_df$dose,
+                           dots = c(0, 0),  # Degree 0 polynomial (means within bins)
+                           line = NULL,     # No line connecting the dots
+                           ci = NULL,       # No confidence intervals
+                           cb = NULL,       # No confidence ban
+                           legendoff = TRUE)
+
+# Create data frame for ACR plot
+acr_df <- data.frame(
+  dose =  res[["Xx"]],
+  acr = res[["dhat"]],
+  upper = res[["ACR_upper_UCB"]],
+  lower = res[["ACR_lower_UCB"]],
+  se =  res[["sigd"]]
+)
+
+# ACR plot (derivative of ATT)
+acr_plot <- ggplot(acr_df, aes(x = dose)) +
+  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "lightgreen", alpha = 0.3) +
+  geom_line(aes(y = acr), color = "darkgreen", size = 1) +
+  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
+  labs(title = "Derivative of ATT(d|d): Average Causal Response",
+       x = "Malaria Ecology (Poveda)",
+       y = "Average Causal Response") +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(hjust = 0.5, face = "bold"),
+    axis.title = element_text(face = "bold"),
+    panel.grid.minor = element_blank()
+  )
+
+# Print the plots
+print(att_plot)
+print(acr_plot)
+
@@ -0,0 +1,45 @@
+# Clear the workspace
+rm(list = ls())
+
+# Load necessary libraries
+library(fixest)
+library(dplyr)
+library(broom)
+library(lmtest)
+library(tidyverse)
+library(haven)
+library(knitr)
+library(sandwich)
+
+# Load the dataset
+data <- read_dta("application/113746-V1/longdiff/co/longdiff_col.dta")
+
+# Convert bplregcol to a factor variable
+data <- data %>% mutate(bplregcol = as.factor(bplregcol))
+
+# Remove rows with missing bplregcol values
+data <- data %>% filter(!is.na(bplregcol))
+
+# Create dummy variables for bplregcol
+dummies <- model.matrix(~ bplregcol, data)
+dummies <- dummies[, -1]  # Remove the first column to avoid multicollinearity
+
+# Calculate log of population density
+data$log_lndens <- log(data$lndens)
+
+# Combine dummy variables with the original data
+data_with_dummies <- cbind(data, dummies)
+
+# Ensure weights are included in the data (assuming wtbpl exists in the data)
+weights <- data_with_dummies$wtbpl
+
+# Run the regression with weights
+model_A <- lm(dlit ~ poveda + log_lndens + nivel_de_vida + .,
+              data = data_with_dummies[, c("dlit", "poveda", "log_lndens", "nivel_de_vida", colnames(dummies))],
+              weights = weights)
+
+# Calculate robust standard errors
+robust_se_A <- coeftest(model_A, vcov = vcovHC(model_A, type = "HC1"))
+
+# Display summary with robust standard errors
+robust_se_A