Skip to content

Commit

Permalink
commit DGP1
Browse files Browse the repository at this point in the history
  • Loading branch information
oddish3 committed Aug 15, 2024
1 parent 470cbb9 commit 0a7663e
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 154 deletions.
292 changes: 146 additions & 146 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,149 +1,3 @@
y2 <- y1 + treat_noise
# Calculate change in outcomes
dy <- y2 - y1
# Calculate true ATT
pop_att <- dgp_params$E_func
samp_att <- mean(treatment_effect[treatment == 1])
# Calculate observed ATE (difference-in-differences)
diff_treated <- mean(dy[treatment == 1])
diff_control <- mean(dy[treatment == 0])
observed_ate <- diff_treated - diff_control
# Calculate true ACR
samp_acr <- mean(dgp_params$func_deriv(dose[dose > 0]))
pop_acr <- dgp_params$E_deriv
browser()
x_grid <- seq(0, 1, length.out = 1000)
func_values <- dgp_params$func(x_grid)
func_deriv_values <- dgp_params$func_deriv(x_grid)
df <- data.frame(
y1 = y1,
y2 = y2,
dy = dy,
treatment = treatment,
dose = dose
)
return(list(
data = df,
info = list(
pop_att = pop_att,
obs_ate = observed_ate,
pop_acr = pop_acr,
func_values = func_values,
func_deriv_values = func_deriv_values,
noisy_treatment_effect = noisy_treatment_effect
)
))
}
# Generate data
# set.seed(1234567)
dat <- gdata(n = n, dgp = dgp)
func_values <- dgp_params$func(x_grid)
func_deriv_values <- dgp_params$func_deriv(x_grid)
<-
x_grid <- if (length(x) == 1) rep(x, 1000) else seq(0, 1, length.out = 1000)
x_grid <- if (length(x) == 1) rep(x, 1000) else seq(0, 1, length.out = 1000)
if (length(x) == 1) rep(x, 1000) else seq(0, 1, length.out = 1000)
func_values <- dgp_params$func(x_grid)
func_deriv_values <- dgp_params$func_deriv(x_grid)
func_values <- dgp_params$func(x_grid)
if (length(x) == 1) {
x_grid <- c(x_grid, rep(x, length(x_grid)))
}
func_deriv_values <- if (is.numeric(dgp_params$func_deriv) && length(dgp_params$func_deriv) == 1) {
rep(dgp_params$func_deriv, length(x_grid))
} else if (is.function(dgp_params$func_deriv)) {
dgp_params$func_deriv(x_grid)
} else {
stop("dgp_params$func_deriv must be either a scalar or a function")
}
is.numeric(dgp_params$func_deriv)
length(dgp_params$func_deriv) == 1
func_deriv_values <- if (length(dgp_params$func_deriv) == 1) {
rep(dgp_params$func_deriv, length(x_grid))
} else if (is.function(dgp_params$func_deriv)) {
dgp_params$func_deriv(x_grid)
} else {
stop("dgp_params$func_deriv must be either a scalar or a function")
}
# Generate data
# set.seed(1234567)
dat <- gdata(n = n, dgp = dgp)
# browser()
tryCatch({
if (any(data$dose < 0 | data$dose > 1, na.rm = TRUE)) {
knots <- c(0.4, 0.5, 0.6)
} else {
quantiles <- quantile(data$dose, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
if (length(unique(quantiles)) == 3) {
margin <- 0.05
knots <- pmax(margin, pmin(1 - margin, quantiles))
} else {
knots <- c(0.4, 0.5, 0.6)
}
}
formula <- bquote(
dy ~ splines2::bSpline(dose, knots = .(knots), degree = 3, intercept = TRUE) - 1
)
model <- suppressWarnings(
fixest::feols(
eval(formula),
data = data,
# cluster = ~ individual
)
)
att_SR <- predict(model)
mean_att_SR <- mean(att_SR)
spline_dosage_SR <- bSpline(data$dose,
knots = knots,
degree = 3,
intercept = TRUE)
n_treated_SR <- sum(data$dose > 0)
infl_reg_SR <- model$residuals * spline_dosage_SR %*%
(MASS::ginv(t(spline_dosage_SR) %*% spline_dosage_SR / n_treated_SR))
infl_att_SR <- infl_reg_SR %*% t(spline_dosage_SR)
se_att_SR <- sqrt(mean(colMeans(infl_att_SR^2)) / n_treated_SR)
derivative_spline_dosage_SR <- dbs(data$dose,
knots = knots,
degree = 3,
intercept = TRUE)
coefficients_SR <- coef(model)
acrt_SR <- as.vector(derivative_spline_dosage_SR %*% coefficients_SR)
acrt_SR_filtered <- acrt_SR[data$dose > 0]
ACR_hat_0 <- mean(acrt_SR_filtered)
list(
att_estimate = mean_att_SR,
att_se = se_att_SR,
acr_estimate = ACR_hat_0
)
}, error = function(e) {
return(list(
att_estimate = NA_real_,
att_se = NA_real_,
acr_estimate = NA_real_,
))
})
source("~/Documents/uni/master-dissertation/contdid/simulation/DGP1.R")
source("~/Documents/uni/master-dissertation/contdid/R/ucb_cvge.R")
devtools::document()
# # Run estimators and calculate metrics
# NPIV
# debugonce(npiv_regression)
npiv_result <- tryCatch({
result <- npiv_regression(
data = data,
treatment_col = "dose",
outcome_col = "dy",
)
# write_debug_info("npiv_regression completed successfully")
result
}, error = function(e) {
# write_debug_info(paste("Error in npiv_regression:", e$message))
return(NULL)
})
npiv_result <- tryCatch({
result <- npiv_regression(
data = data,
treatment_col = "dose",
outcome_col = "dy",
)
# write_debug_info("npiv_regression completed successfully")
Expand Down Expand Up @@ -510,3 +364,149 @@ stopCluster(cl)
toc()
View(results_list)
devtools::test()
devtools::load_all()
devtools::test()
devtools::load_all()
devtools::test()
devtools::load_all()
devtools::test()
debugonce(npiv_regression)
devtools::load_all()
devtools::test()
library(testthat)
library(readr)
library(dplyr)
library(purrr)
library(contdid)
set.seed(1234567)
# Helper function to load CSV files
load_csv_files <- function(directory) {
tryCatch({
csv_files <- list.files(directory, pattern = "\\.csv$", full.names = TRUE)
if (length(csv_files) == 0) {
stop("No CSV files found in the specified directory")
}
message(paste("Found", length(csv_files), "CSV files"))
data_list <- map(csv_files, ~{
message(paste("Processing file:", .x))
var_name <- tools::file_path_sans_ext(basename(.x))
var_name <- sub("^123_", "", var_name) # Remove "123_" prefix
data <- read_csv(.x, col_types = cols(), col_names = FALSE)
if (ncol(data) == 1) {
setNames(list(as.numeric(data[[1]])), var_name)
} else {
setNames(list(data), var_name)
}
}) %>% flatten()
message("CSV files loaded successfully")
return(data_list)
}, error = function(e) {
message(paste("Error in load_csv_files:", e$message))
return(NULL)
})
}
# Test function
test_matlab_equality <- function(r_result, matlab_data, tolerance = 1e-2) {
tryCatch({
common_vars <- intersect(names(r_result), names(matlab_data))
message(paste("Testing", length(common_vars), "common variables"))
for (var in common_vars) {
message(paste("Testing variable:", var))
r_value <- r_result[[var]]
matlab_value <- matlab_data[[var]]
# Convert to vector if it's a single-column data frame
restructure_variable <- function(var) {
if (is.data.frame(var) && ncol(var) == 1) {
# If the variable is a data frame with 1 column, extract the vector
var <- var[[1]]
}
if (is.matrix(var) && ncol(var) == 1) {
# If the variable is a matrix with 1 column, convert to a vector
var <- as.numeric(var)
}
if (is.array(var) && length(dim(var)) == 2 && dim(var)[2] == 1) {
# If the variable is a 2D array with 1 column, convert to a vector
var <- as.numeric(var)
}
return(var)
}
# Restructure r_value and matlab_value
r_value <- restructure_variable(r_value)
matlab_value <- restructure_variable(matlab_value)
# Check if the lengths are the same
expect_equal(length(r_value), length(matlab_value),
info = paste("Length mismatch for variable:", var))
# Check if the values are approximately equal
expect_equal(r_value, matlab_value, tolerance = tolerance,
info = paste("Value mismatch for variable:", var))
}
message("All variables tested successfully")
}, error = function(e) {
message(paste("Error in test_matlab_equality:", e$message))
})
}
tryCatch({
skip_if_not(dir.exists(test_path("data", "matlab_results")),
"MATLAB data directory not found")
skip_if_not(file.exists(test_path("data", "medicare1.csv")),
"Medicare data file not found")
message("Loading MATLAB CSV files")
matlab_data <- load_csv_files(test_path("data", "matlab_results"))
if (is.null(matlab_data)) {
skip("Failed to load MATLAB data")
}
message("Loading R data")
data <- read.csv(test_path("data", "medicare1.csv"))
message(paste("R data dimensions:", nrow(data), "x", ncol(data)))
message("Running npiv_regression")
r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio")
message("Comparing results")
test_matlab_equality(r_result, matlab_data)
message("Test completed successfully")
}, error = function(e) {
message(paste("Error in main test:", e$message))
stop(e)
})
skip_if_not(dir.exists(test_path("data", "matlab_results")),
"MATLAB data directory not found")
skip_if_not(file.exists(test_path("data", "medicare1.csv")),
"Medicare data file not found")
message("Loading MATLAB CSV files")
matlab_data <- load_csv_files(test_path("data", "matlab_results"))
if (is.null(matlab_data)) {
skip("Failed to load MATLAB data")
}
message("Loading R data")
data <- read.csv(test_path("data", "medicare1.csv"))
message(paste("R data dimensions:", nrow(data), "x", ncol(data)))
message("Running npiv_regression")
r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio")
debugonce(npiv_regression)
r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio")
devtools::load_all()
devtools::test()
results_list <- readRDS("~/Documents/uni/master-dissertation/contdid/simulation/results_list.rds")
# saveRDS(results_list, file = "results_list.rds")
# Combine all results into a single data frame
all_results <- do.call(rbind, results_list)
# Calculate mean results
mean_results <- all_results %>%
group_by(n, dgp) %>%
summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")
library(tictoc)
library(parallel)
library(doParallel)
library(foreach)
library(doRNG)
library(tidyverse)
library(fixest)
library(splines2)
library(triangle)
# saveRDS(results_list, file = "results_list.rds")
# Combine all results into a single data frame
all_results <- do.call(rbind, results_list)
# Calculate mean results
mean_results <- all_results %>%
group_by(n, dgp) %>%
summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")
View(mean_results)
12 changes: 9 additions & 3 deletions R/npiv_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,19 @@ npiv_regression <- function(data,
CJ <- c(0, cumsum(TJ))
# log_dimensions("TJ", TJ)
# log_dimensions("CJ", CJ)
# Create grid for x
# Xx <- seq(0, 1, length.out = nx)
# Xx_sub <- Xx[Xx>0.01 & Xx<=0.99]

# Create grid for x
# x_min <- min(x)
# x_max <- max(x)
# Xx <- seq(x_min, x_max, length.out = nx + 1)
# buffer <- 0.01 * (x_max - x_min)
# Xx_sub <- Xx[Xx > x_min + buffer & Xx <= x_max - buffer]

Xx <- seq(0, 1, length.out = nx + 1) # +0 because MATLAB's 0:1/nx:1 includes both endpoints removed + 1
Xx_sub <- Xx[Xx > 0.01 & Xx <= 0.99]
# log_dimensions("Xx", Xx)
# log_dimensions("Xx_sub", Xx_sub)

# Compute basis functions
compute_basis_functions <- function(x, nL, r, CJ, derivative = FALSE) {
result <- matrix(0, nrow = length(x), ncol = CJ[length(CJ)])
Expand Down
10 changes: 5 additions & 5 deletions simulation/working-sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ clusterExport(cl, c("dgp_function", "gdata", "run_twfe", "run_feols_bspline", "c

results_list <- list()
tic()
for (dgp in 2:2) {
for (dgp in 1:4) {
# cat("Processing DGP:", dgp, "\n")
dgp_results <- list()
for (sample_size in n) {
Expand All @@ -57,11 +57,11 @@ stopCluster(cl)
toc()
# saveRDS(results_list, file = "results_list.rds")
# Combine all results into a single data frame
# all_results <- do.call(rbind, results_list)
all_results <- do.call(rbind, results_list)

# Calculate mean results
# mean_results <- all_results %>%
# group_by(n, dgp) %>%
# summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")
mean_results <- all_results %>%
group_by(n, dgp) %>%
summarise(across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop")


1 change: 1 addition & 0 deletions tests/testthat/test-matlab-equality.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ test_that("Short Test: R results match MATLAB results", {
message(paste("R data dimensions:", nrow(data), "x", ncol(data)))

message("Running npiv_regression")
# debugonce(npiv_regression)
r_result <- npiv_regression(data, "medicare_share_1983", "d_capital_labor_ratio")

message("Comparing results")
Expand Down

0 comments on commit 0a7663e

Please sign in to comment.