From 7389658c2ece67d02883fccc8f26c1e7b4ddcaf0 Mon Sep 17 00:00:00 2001 From: oddish3 Date: Wed, 21 Aug 2024 21:46:34 +0100 Subject: [PATCH] [long-run] --- .Rhistory | 942 ++++++++++++++++++------------------- simulation/cred-cont-sim.R | 66 +-- simulation/working-sim.R | 4 +- 3 files changed, 494 insertions(+), 518 deletions(-) diff --git a/.Rhistory b/.Rhistory index a9e5415..5ff73f3 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -h0 = d0, -hhat = npiv_result$dhat, -sigh = npiv_result$sigd, -zast = npiv_result$dzast[["95%"]], -theta = npiv_result$thet, -A = A -) -npiv_result$dhat -npiv_result$sigd -npiv_result$dzast[["95%"]] -npiv_result$thet -rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") -library(contdid) -# Source necessary functions -source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000) -nrep <- 1 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 2:2) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") +next +} tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") +npiv_result <- npiv(PP1, BB1, y) +u1 <- npiv_result$u +Q1 <- npiv_result$Q +Bu1 <- sweep(BB1, 1, u1, `*`) +cat("Dimensions of Bu1:", dim(Bu1), "\n") +OL1 <- Q1 %*% (t(Bu1) %*% Bu1 / n) %*% t(Q1) +tden <- numeric(nrow(Px1)) +for (x in 1:nrow(Px1)) { +s1 <- Px1[x, ] %*% OL1 %*% Px1[x, ] +tden[x] <- sqrt(s1) +} +for (b in 1:nb) { +Buw1 <- t(Bu1) %*% omega[1:nrow(Bu1), b] / sqrt(n) +tnum <- Px1 %*% Q1 %*% Buw1 +ZZ[i, b] <- switch(as.character(type), +"0" = max(abs(tnum / tden)), +"-1" = max(tnum / tden), +"1" = min(tnum / tden)) +} }, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") +warning("Error in iteration ", i, ": ", e$message) }) } -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") +if (all(ZZ == 0)) stop("All iterations failed or produced zero values") +z <- if (type %in% c(0, -1)) apply(ZZ, 2, max) else apply(ZZ, 2, min) +cv <- if (type %in% c(0, -1)) quantile(z, 1 - alpha) else -quantile(z, alpha) +return(cv) +} +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000, 0, alpha) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000, 0) +ucb_cv <- function(Ltil, Lhat, Px, PP, BB, CJ, CK, y, nb, type, alpha = 0.05) { +# Input validation +if (length(CJ) < 2) stop("CJ must have at least 2 elements") +if (!is.numeric(Ltil) || !is.numeric(Lhat)) stop("Ltil and Lhat must be numeric") +if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) stop("alpha must be a number between 0 and 1") +cat("Ltil:", Ltil, "Lhat:", Lhat, "\n") +cat("Length of CJ:", length(CJ), "First few elements:", head(CJ), "\n") +cat("Dimensions of Px:", dim(Px), "\n") +cat("Dimensions of PP:", dim(PP), "\n") +cat("Dimensions of BB:", dim(BB), "\n") +n_Px <- nrow(Px) +n_PP <- nrow(PP) +if (n_Px != n_PP) { +warning("Mismatch in number of rows between Px and PP. Using the smaller dimension.") +n <- min(n_Px, n_PP) +Px <- Px[1:n, , drop = FALSE] +PP <- PP[1:n, , drop = FALSE] +BB <- BB[1:n, , drop = FALSE] +y <- y[1:n] +} else { +n <- n_Px +} +Lmax <- Lhat + 1 +omega <- matrix(rnorm(n * nb), n, nb) +num_iterations <- min(max(Ltil, Lmax - 2, 1), length(CJ) - 1) +if (num_iterations < 1) stop("No valid iterations possible with given parameters") +ZZ <- matrix(0, num_iterations, nb) +cat("Number of iterations:", num_iterations, "\n") +cat("Dimensions of ZZ:", dim(ZZ), "\n") +cat("Dimensions of omega:", dim(omega), "\n") +for (i in 1:num_iterations) { +end_idx <- min(CJ[i + 1], ncol(Px)) +Px1 <- Px[, (CJ[i] + 1):end_idx, drop = FALSE] +PP1 <- PP[, (CJ[i] + 1):end_idx, drop = FALSE] +end_idx_BB <- min(CK[i + 1], ncol(BB)) +BB1 <- BB[, (CK[i] + 1):end_idx_BB, drop = FALSE] +cat("Iteration", i, "\n") +cat("Dimensions of Px1:", dim(Px1), "\n") +cat("Dimensions of PP1:", dim(PP1), "\n") +cat("Dimensions of BB1:", dim(BB1), "\n") +if (ncol(BB1) == 0) { +warning("BB1 is empty in iteration ", i, ". Skipping this iteration.") +next } -# Stop cluster -stopCluster(cl) -toc() -View(results_list) -devtools::test() -devtools::load_all() -devtools::test() -devtools::load_all() -devtools::test() -devtools::load_all() -devtools::test() -debugonce(npiv_regression) -devtools::load_all() -devtools::test() -library(testthat) -library(readr) -library(dplyr) -library(purrr) -library(contdid) -set.seed(1234567) -# Helper function to load CSV files -load_csv_files <- function(directory) { tryCatch({ -csv_files <- list.files(directory, pattern = "\\.csv$", full.names = TRUE) -if (length(csv_files) == 0) { -stop("No CSV files found in the specified directory") +npiv_result <- npiv(PP1, BB1, y) +u1 <- npiv_result$u +Q1 <- npiv_result$Q +Bu1 <- sweep(BB1, 1, u1, `*`) +cat("Dimensions of Bu1:", dim(Bu1), "\n") +OL1 <- Q1 %*% (t(Bu1) %*% Bu1 / n) %*% t(Q1) +tden <- numeric(nrow(Px1)) +for (x in 1:nrow(Px1)) { +s1 <- Px1[x, ] %*% OL1 %*% Px1[x, ] +tden[x] <- sqrt(s1) } -message(paste("Found", length(csv_files), "CSV files")) -data_list <- map(csv_files, ~{ -message(paste("Processing file:", .x)) -var_name <- tools::file_path_sans_ext(basename(.x)) -var_name <- sub("^123_", "", var_name) # Remove "123_" prefix -data <- read_csv(.x, col_types = cols(), col_names = FALSE) -if (ncol(data) == 1) { -setNames(list(as.numeric(data[[1]])), var_name) -} else { -setNames(list(data), var_name) +for (b in 1:nb) { +Buw1 <- t(Bu1) %*% omega[1:nrow(Bu1), b] / sqrt(n) +tnum <- Px1 %*% Q1 %*% Buw1 +ZZ[i, b] <- switch(as.character(type), +"0" = max(abs(tnum / tden)), +"-1" = max(tnum / tden), +"1" = min(tnum / tden)) } -}) %>% flatten() -message("CSV files loaded successfully") -return(data_list) }, error = function(e) { -message(paste("Error in load_csv_files:", e$message)) -return(NULL) +warning("Error in iteration ", i, ": ", e$message) }) } -# Test function -test_matlab_equality <- function(r_result, matlab_data, tolerance = 1e-2) { -tryCatch({ -common_vars <- intersect(names(r_result), names(matlab_data)) -message(paste("Testing", length(common_vars), "common variables")) -for (var in common_vars) { -message(paste("Testing variable:", var)) -r_value <- r_result[[var]] -matlab_value <- matlab_data[[var]] -# Convert to vector if it's a single-column data frame -restructure_variable <- function(var) { -if (is.data.frame(var) && ncol(var) == 1) { -# If the variable is a data frame with 1 column, extract the vector -var <- var[[1]] +if (all(ZZ == 0)) stop("All iterations failed or produced zero values") +z <- if (type %in% c(0, -1)) apply(ZZ, 2, max) else apply(ZZ, 2, min) +cv <- if (type %in% c(0, -1)) quantile(z, 1 - alpha) else -quantile(z, alpha) +return(cv) } -if (is.matrix(var) && ncol(var) == 1) { -# If the variable is a matrix with 1 column, convert to a vector -var <- as.numeric(var) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000, 0) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000) +ucb_cv <- function(Ltil, Lhat, Px, PP, BB, CJ, CK, y, nb, type, alpha = 0.05) { +# Input validation +if (length(CJ) < 2) stop("CJ must have at least 2 elements") +if (!is.numeric(Ltil) || !is.numeric(Lhat)) stop("Ltil and Lhat must be numeric") +if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) stop("alpha must be a number between 0 and 1") +cat("Ltil:", Ltil, "Lhat:", Lhat, "\n") +cat("Length of CJ:", length(CJ), "First few elements:", head(CJ), "\n") +cat("Dimensions of Px:", dim(Px), "\n") +cat("Dimensions of PP:", dim(PP), "\n") +cat("Dimensions of BB:", dim(BB), "\n") +cat("CJ:", CJ "\n") +ucb_cv <- function(Ltil, Lhat, Px, PP, BB, CJ, CK, y, nb, type, alpha = 0.05) { +# Input validation +if (length(CJ) < 2) stop("CJ must have at least 2 elements") +if (!is.numeric(Ltil) || !is.numeric(Lhat)) stop("Ltil and Lhat must be numeric") +if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) stop("alpha must be a number between 0 and 1") +cat("Ltil:", Ltil, "Lhat:", Lhat, "\n") +cat("Dimensions of Px:", dim(Px), "\n") +cat("Dimensions of PP:", dim(PP), "\n") +cat("Dimensions of BB:", dim(BB), "\n") +cat("CJ:", CJ "\n") +ucb_cv <- function(Ltil, Lhat, Px, PP, BB, CJ, CK, y, nb, type, alpha = 0.05) { +# Input validation +if (length(CJ) < 2) stop("CJ must have at least 2 elements") +if (!is.numeric(Ltil) || !is.numeric(Lhat)) stop("Ltil and Lhat must be numeric") +if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) stop("alpha must be a number between 0 and 1") +cat("Ltil:", Ltil, "Lhat:", Lhat, "\n") +cat("Dimensions of Px:", dim(Px), "\n") +cat("Dimensions of PP:", dim(PP), "\n") +cat("Dimensions of BB:", dim(BB), "\n") +cat("CJ:", CJ, "\n") +cat("y:", head(y), "\n") +cat("type:", type, "\n") +cat("alpha:", a, "\n") +n_Px <- nrow(Px) +n_PP <- nrow(PP) +if (n_Px != n_PP) { +warning("Mismatch in number of rows between Px and PP. Using the smaller dimension.") +n <- min(n_Px, n_PP) +Px <- Px[1:n, , drop = FALSE] +PP <- PP[1:n, , drop = FALSE] +BB <- BB[1:n, , drop = FALSE] +y <- y[1:n] +} else { +n <- n_Px } -if (is.array(var) && length(dim(var)) == 2 && dim(var)[2] == 1) { -# If the variable is a 2D array with 1 column, convert to a vector -var <- as.numeric(var) +Lmax <- Lhat + 1 +omega <- matrix(rnorm(n * nb), n, nb) +num_iterations <- min(max(Ltil, Lmax - 2, 1), length(CJ) - 1) +if (num_iterations < 1) stop("No valid iterations possible with given parameters") +ZZ <- matrix(0, num_iterations, nb) +cat("Number of iterations:", num_iterations, "\n") +cat("Dimensions of ZZ:", dim(ZZ), "\n") +cat("Dimensions of omega:", dim(omega), "\n") +for (i in 1:num_iterations) { +end_idx <- min(CJ[i + 1], ncol(Px)) +Px1 <- Px[, (CJ[i] + 1):end_idx, drop = FALSE] +PP1 <- PP[, (CJ[i] + 1):end_idx, drop = FALSE] +end_idx_BB <- min(CK[i + 1], ncol(BB)) +BB1 <- BB[, (CK[i] + 1):end_idx_BB, drop = FALSE] +cat("Iteration", i, "\n") +cat("Dimensions of Px1:", dim(Px1), "\n") +cat("Dimensions of PP1:", dim(PP1), "\n") +cat("Dimensions of BB1:", dim(BB1), "\n") +if (ncol(BB1) == 0) { +warning("BB1 is empty in iteration ", i, ". Skipping this iteration.") +next } -return(var) +tryCatch({ +npiv_result <- npiv(PP1, BB1, y) +u1 <- npiv_result$u +Q1 <- npiv_result$Q +Bu1 <- sweep(BB1, 1, u1, `*`) +cat("Dimensions of Bu1:", dim(Bu1), "\n") +OL1 <- Q1 %*% (t(Bu1) %*% Bu1 / n) %*% t(Q1) +tden <- numeric(nrow(Px1)) +for (x in 1:nrow(Px1)) { +s1 <- Px1[x, ] %*% OL1 %*% Px1[x, ] +tden[x] <- sqrt(s1) } -# Restructure r_value and matlab_value -r_value <- restructure_variable(r_value) -matlab_value <- restructure_variable(matlab_value) -# Check if the lengths are the same -expect_equal(length(r_value), length(matlab_value), -info = paste("Length mismatch for variable:", var)) -# Check if the values are approximately equal -expect_equal(r_value, matlab_value, tolerance = tolerance, -info = paste("Value mismatch for variable:", var)) +for (b in 1:nb) { +Buw1 <- t(Bu1) %*% omega[1:nrow(Bu1), b] / sqrt(n) +tnum <- Px1 %*% Q1 %*% Buw1 +ZZ[i, b] <- switch(as.character(type), +"0" = max(abs(tnum / tden)), +"-1" = max(tnum / tden), +"1" = min(tnum / tden)) } -message("All variables tested successfully") }, error = function(e) { -message(paste("Error in test_matlab_equality:", e$message)) +warning("Error in iteration ", i, ": ", e$message) }) } +if (all(ZZ == 0)) stop("All iterations failed or produced zero values") +z <- if (type %in% c(0, -1)) apply(ZZ, 2, max) else apply(ZZ, 2, min) +cv <- if (type %in% c(0, -1)) quantile(z, 1 - alpha) else -quantile(z, alpha) +return(cv) +} +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000, 0, alpha) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, nb = 1000, type = 0, alpha = alpha) +ucb_cv <- function(Ltil, Lhat, Px, PP, BB, CJ, CK, y, nb, type, alpha = 0.05) { +# Input validation +if (length(CJ) < 2) stop("CJ must have at least 2 elements") +if (!is.numeric(Ltil) || !is.numeric(Lhat)) stop("Ltil and Lhat must be numeric") +if (!is.numeric(alpha) || alpha <= 0 || alpha >= 1) stop("alpha must be a number between 0 and 1") +cat("Ltil:", Ltil, "Lhat:", Lhat, "\n") +cat("Dimensions of Px:", dim(Px), "\n") +cat("Dimensions of PP:", dim(PP), "\n") +cat("Dimensions of BB:", dim(BB), "\n") +cat("CJ:", CJ, "\n") +cat("y:", head(y), "\n") +cat("type:", type, "\n") +cat("alpha:", alpha, "\n") +n_Px <- nrow(Px) +n_PP <- nrow(PP) +if (n_Px != n_PP) { +warning("Mismatch in number of rows between Px and PP. Using the smaller dimension.") +n <- min(n_Px, n_PP) +Px <- Px[1:n, , drop = FALSE] +PP <- PP[1:n, , drop = FALSE] +BB <- BB[1:n, , drop = FALSE] +y <- y[1:n] +} else { +n <- n_Px +} +Lmax <- Lhat + 1 +omega <- matrix(rnorm(n * nb), n, nb) +num_iterations <- min(max(Ltil, Lmax - 2, 1), length(CJ) - 1) +if (num_iterations < 1) stop("No valid iterations possible with given parameters") +ZZ <- matrix(0, num_iterations, nb) +cat("Number of iterations:", num_iterations, "\n") +cat("Dimensions of ZZ:", dim(ZZ), "\n") +cat("Dimensions of omega:", dim(omega), "\n") +for (i in 1:num_iterations) { +end_idx <- min(CJ[i + 1], ncol(Px)) +Px1 <- Px[, (CJ[i] + 1):end_idx, drop = FALSE] +PP1 <- PP[, (CJ[i] + 1):end_idx, drop = FALSE] +end_idx_BB <- min(CK[i + 1], ncol(BB)) +BB1 <- BB[, (CK[i] + 1):end_idx_BB, drop = FALSE] +cat("Iteration", i, "\n") +cat("Dimensions of Px1:", dim(Px1), "\n") +cat("Dimensions of PP1:", dim(PP1), "\n") +cat("Dimensions of BB1:", dim(BB1), "\n") +if (ncol(BB1) == 0) { +warning("BB1 is empty in iteration ", i, ". Skipping this iteration.") +next +} tryCatch({ -skip_if_not(dir.exists(test_path("data", "matlab_results")), -"MATLAB data directory not found") -skip_if_not(file.exists(test_path("data", "medicare1.csv")), -"Medicare data file not found") -message("Loading MATLAB CSV files") -matlab_data <- load_csv_files(test_path("data", "matlab_results")) -if (is.null(matlab_data)) { -skip("Failed to load MATLAB data") +npiv_result <- npiv(PP1, BB1, y) +u1 <- npiv_result$u +Q1 <- npiv_result$Q +Bu1 <- sweep(BB1, 1, u1, `*`) +cat("Dimensions of Bu1:", dim(Bu1), "\n") +OL1 <- Q1 %*% (t(Bu1) %*% Bu1 / n) %*% t(Q1) +tden <- numeric(nrow(Px1)) +for (x in 1:nrow(Px1)) { +s1 <- Px1[x, ] %*% OL1 %*% Px1[x, ] +tden[x] <- sqrt(s1) +} +for (b in 1:nb) { +Buw1 <- t(Bu1) %*% omega[1:nrow(Bu1), b] / sqrt(n) +tnum <- Px1 %*% Q1 %*% Buw1 +ZZ[i, b] <- switch(as.character(type), +"0" = max(abs(tnum / tden)), +"-1" = max(tnum / tden), +"1" = min(tnum / tden)) } -message("Loading R data") -data <- read.csv(test_path("data", "medicare1.csv")) -message(paste("R data dimensions:", nrow(data), "x", ncol(data))) -message("Running npiv_regression") -r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio") -message("Comparing results") -test_matlab_equality(r_result, matlab_data) -message("Test completed successfully") }, error = function(e) { -message(paste("Error in main test:", e$message)) -stop(e) +warning("Error in iteration ", i, ": ", e$message) }) -skip_if_not(dir.exists(test_path("data", "matlab_results")), -"MATLAB data directory not found") -skip_if_not(file.exists(test_path("data", "medicare1.csv")), -"Medicare data file not found") -message("Loading MATLAB CSV files") -matlab_data <- load_csv_files(test_path("data", "matlab_results")) -if (is.null(matlab_data)) { -skip("Failed to load MATLAB data") } -message("Loading R data") -data <- read.csv(test_path("data", "medicare1.csv")) -message(paste("R data dimensions:", nrow(data), "x", ncol(data))) -message("Running npiv_regression") -r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio") -debugonce(npiv_regression) -r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio") -devtools::load_all() -devtools::test() -results_list <- readRDS("~/Documents/uni/master-dissertation/contdid/simulation/results_list.rds") -# saveRDS(results_list, file = "results_list.rds") -# Combine all results into a single data frame -all_results <- do.call(rbind, results_list) -# Calculate mean results -mean_results <- all_results %>% -group_by(n, dgp) %>% -summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop") -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# saveRDS(results_list, file = "results_list.rds") -# Combine all results into a single data frame -all_results <- do.call(rbind, results_list) -# Calculate mean results -mean_results <- all_results %>% -group_by(n, dgp) %>% -summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop") -View(mean_results) -library(truncnorm) -devtools::load_all("~/Documents/uni/master-dissertation/contdid") -gdata <- function(n, prop_treated = 0.8) { -# Generate treatment assignment -treatment <- rbinom(n, 1, prop_treated) -# Generate dose for treated units -dose <- rep(0, n) -dose[treatment == 1] <- rtruncnorm(sum(treatment == 1), a = 0.011, b = 0.99, 0.5, 0.16) -dy <- rep(0, n) -dy[treatment == 1] <- -4*(dose[treatment == 1] - 0.5)^2 + 1 -# Create dataframe -df <- data.frame( -dy = dy, -treatment = treatment, -dose = dose -) -return(df) +if (all(ZZ == 0)) stop("All iterations failed or produced zero values") +z <- if (type %in% c(0, -1)) apply(ZZ, 2, max) else apply(ZZ, 2, min) +cv <- if (type %in% c(0, -1)) quantile(z, 1 - alpha) else -quantile(z, alpha) +return(cv) } -data <- gdata(1000) -mean(data$dy[data$treatment == 1]) - mean(data$dy[data$treatment == 0]) -result <- npiv_regression( -data = data, -treatment_col = "dose", -outcome_col = "dy", -) -Q -result <- npiv_regression( -data = data, -treatment_col = "dose", -outcome_col = "dy", -) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, nb = 1000, type = 0, alpha = alpha) +ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, nb = 1000, type = 0, alpha = alpha) +res <- ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, nb = 1000, type = 0, alpha = alpha) +res +hzast <- ucb_cv(Ltil, Lhat, Px, PP, PP, CJ, CJ, y, n, 1000, 0, alpha) +res1 <- ucb_cv(Ltil, Lhat, Dx, PP, PP, CJ, CJ, y, nb = 1000, type = 0, alpha = alpha) rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") library(contdid) -# Source necessary functions source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000) -nrep <- 1 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 2:2) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") -tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") -}, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") -}) -} -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") -} -# Stop cluster -stopCluster(cl) -toc() -View(results_list) +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") library(contdid) -# Source necessary functions source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000) -nrep <- 1 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 1:4) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") -tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") -}, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") -}) -} -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") -} -# Stop cluster -stopCluster(cl) -toc() -View(results_list) -saveRDS(results_list, file = "simulation/results_list.rds") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") library(contdid) -# Source necessary functions source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000) -nrep <- 1 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 1:4) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") -tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") -}, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") -}) -} -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") -} -# Stop cluster -stopCluster(cl) -toc() -saveRDS(results_list, file = "simulation/results_list.rds") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +# data <- data %>% +# filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +# mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +View(npiv_result) +npiv_result[["hzast"]] +devtools::load_all() rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) -library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") library(contdid) -# Source necessary functions source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000,2500) -nrep <- 1000 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 1:4) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") -tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") -}, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") -}) -} -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") -} -# Stop cluster -stopCluster(cl) -toc() -saveRDS(results_list, file = "simulation/results_list.rds") -# Combine all results into a single data frame -all_results <- do.call(rbind, results_list) -# Calculate mean results -mean_results <- all_results %>% -group_by(n, dgp) %>% -summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop") -View(mean_results) -View(results_list) +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +devtools::load_all() +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +devtools::load_all() +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +devtools::load_all() +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] +source("~/Documents/uni/master-dissertation/contdid/R/ucb_cv.R") +devtools::load_all() +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +# data <- data %>% +# filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +# mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] +rm(list=ls()) +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) +# %>% +# mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +rm(list=ls()) +devtools::load_all() +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] rm(list=ls()) -library(tictoc) -library(parallel) -library(doParallel) -library(foreach) -library(doRNG) +devtools::load_all() +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] +rm(list=ls()) +devtools::load_all() +library(contdid) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +rm(list=ls()) +devtools::load_all() +library(contdid) library(tidyverse) -library(fixest) -library(splines2) -library(triangle) -# remove.packages("contdid") -# devtools::build() -# devtools::install() -# devtools::load_all("~/Documents/uni/master-dissertation/contdid") +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +data <- data %>% +filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] +View(results) +View(npiv_result) +View(npiv_result) +rm(list=ls()) +devtools::load_all() library(contdid) -# Source necessary functions +library(tidyverse) source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") -source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R") -# Set seed -seed1 <- 1234 -set.seed(seed1) -n <- c(100, 500, 1000,2500) -nrep <- 1 -# Create cluster -cl <- makeCluster(detectCores() - 1) -registerDoParallel(cl) -# Export necessary functions to the cluster -clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "calculate_point_metrics", "ucb_cvge", -"write_debug_info", "log_dimensions")) -# Run simulations for all DGPs -results_list <- list() -tic() -for (dgp in 1:4) { -# cat("Processing DGP:", dgp, "\n") -dgp_results <- list() -for (sample_size in n) { -# cat(" Sample size:", sample_size, "\n") -tryCatch({ -dgp_results[[as.character(sample_size)]] <- run_simulation(n = sample_size, dgp = dgp, nrep = nrep) -# cat(" Completed successfully\n") -}, error = function(e) { -cat(" Error occurred:", conditionMessage(e), "\n") -}) -} -results_list[[dgp]] <- do.call(rbind, dgp_results) -# cat("DGP", dgp, "completed\n\n") -} -# Stop cluster -stopCluster(cl) -toc() -View(results_list) -# saveRDS(results_list, file = "simulation/results_list.rds") -# Combine all results into a single data frame -all_results <- do.call(rbind, results_list) -# Calculate mean results -mean_results <- all_results %>% -group_by(n, dgp) %>% -summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop") -View(mean_results) +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +# data <- data %>% +# filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +# mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +rm(list=ls()) +devtools::load_all() +library(contdid) +library(tidyverse) +source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R") +set.seed(123) # for reproducibility +results <- gdata(100, dgp = 4) +data <- results$data +info <- results$info +# data <- data %>% +# filter((dose > 0.25 & dose < 0.75) | dose == 0) %>% +# mutate(dose = (dose - min(dose)) / (max(dose) - min(dose))) +# debugonce(npiv_regression) +npiv_result <- npiv_regression(data, "dose", "dy") +npiv_result[["hzast"]] +npiv_result[["dzast"]] +View(npiv_result) diff --git a/simulation/cred-cont-sim.R b/simulation/cred-cont-sim.R index 111f522..d05e99b 100644 --- a/simulation/cred-cont-sim.R +++ b/simulation/cred-cont-sim.R @@ -1,9 +1,12 @@ rm(list=ls()) - +library(generics) +library(tidyverse) +library(tidyr) library(dplyr) set.seed(123) # Load the data (assuming you've already done this) data <- readRDS("~/Downloads/combined_data.rds") +unique(data$year) # Step 1: Assign pre/post periods data <- data %>% @@ -46,7 +49,7 @@ data <- data %>% avg_death_rate <- mean(data$deaths_per_100k) treatment_effect <- function(d, avg_rate) { - return(0.5 * d * avg_rate / 100000) # Scale to match deaths per person + return(2.5 * d * avg_rate / 100000) # Scale to match deaths per person } data <- data %>% @@ -76,52 +79,25 @@ e_delta_y_d_positive <- data %>% print(paste("E[ΔY|D > 0] =", e_delta_y_d_positive)) # Calculate the difference -att_difference <- e_delta_y_d0 - e_delta_y_d_positive +att_difference <- e_delta_y_d_positive - e_delta_y_d0 print(paste("ATT (E[ΔY|D = 0] - E[ΔY|D > 0]) =", att_difference)) +# Calculate change in outcome between periods +data <- data %>% + group_by(la_name) %>% + mutate( + deaths_per_100k_change = deaths_per_100k_sim - first(deaths_per_100k_sim), + dose_indicator = if_else(dose > 0, 1, 0) + ) %>% + ungroup() +# Filter for the second period (post-treatment) +data_post <- data %>% + filter(period == "post") +# Run regression +regression_model <- lm(deaths_per_100k_change ~ dose_indicator, data = data_post) +# Print regression summary +summary(regression_model) - - - -calculate_att_d <- function(data, d, bandwidth = 0.1) { - # browser() - treated <- data %>% - filter(period == "post" & abs(dose - d) < bandwidth & treated == 1) - - control <- data %>% - filter(period == "post" & treated == 0) - - att_d <- mean(treated$baseline_rate - treated$deaths_per_100k_sim) - - mean(control$baseline_rate - control$deaths_per_100k) - - return(att_d) -} - -dose_points <- seq(0.1, 1, by = 0.1) -att_d_estimates <- sapply(dose_points, calculate_att_d, data = data) - -atto_estimate <- mean(att_d_estimates) -print(paste("Estimated ATTo:", atto_estimate)) - -# Calculate E[ΔY|D = 0] -e_delta_y_d0 <- data %>% - filter(treated == 0) %>% - summarize(mean_change = mean(deaths_per_100k[period == "post"] - baseline_rate)) %>% - pull(mean_change) - -print(paste("E[ΔY|D = 0] =", e_delta_y_d0)) - -# Calculate E[ΔY|D > 0] -e_delta_y_d_positive <- data %>% - filter(treated == 1) %>% - summarize(mean_change = mean(deaths_per_100k_sim[period == "post"] - baseline_rate)) %>% - pull(mean_change) - -print(paste("E[ΔY|D > 0] =", e_delta_y_d_positive)) - -# Calculate the difference -att_difference <- e_delta_y_d0 - e_delta_y_d_positive -print(paste("ATT (E[ΔY|D = 0] - E[ΔY|D > 0]) =", att_difference)) diff --git a/simulation/working-sim.R b/simulation/working-sim.R index c6c53bb..81d2e13 100644 --- a/simulation/working-sim.R +++ b/simulation/working-sim.R @@ -22,7 +22,7 @@ source("~/Documents/uni/master-dissertation/contdid/simulation/run_simulation.R" seed1 <- 1234 set.seed(seed1) n <- c(100, 500, 1000,2500) -nrep <- 1000 +nrep <- 1 # Create cluster cl <- makeCluster(detectCores() - 1) @@ -51,7 +51,7 @@ for (dgp in 1:4) { # Stop cluster stopCluster(cl) toc() -# saveRDS(results_list, file = "simulation/results_list1.rds") +# saveRDS(results_list, file = "simulation/results_list2.rds") # Combine all results into a single data frame all_results <- do.call(rbind, results_list)