From 886ce0edb8cab2807f630e6d85a9736b645d751b Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Mon, 7 Oct 2024 10:48:54 +0200
Subject: [PATCH 1/6] Building an R version of MR link 2 and testing for
 equivalence of likelihood function

---
 R/mr_link_2_functions.R                    |  72 +++++
 tests/test_for_r_and_python_concordance.py | 314 +++++++++++++++++++++
 2 files changed, 386 insertions(+)
 create mode 100644 R/mr_link_2_functions.R
 create mode 100644 tests/test_for_r_and_python_concordance.py

diff --git a/R/mr_link_2_functions.R b/R/mr_link_2_functions.R
new file mode 100644
index 0000000..b0dfb43
--- /dev/null
+++ b/R/mr_link_2_functions.R
@@ -0,0 +1,72 @@
+mr_link2_loglik_reference_v2 <- function(th, lam, c_x, c_y, n_x, n_y) {
+  # Convert n_x and n_y to float
+  n_x <- as.numeric(n_x)
+  n_y <- as.numeric(n_y)
+  a <- th[1]
+  tX <- abs(th[2])
+  tY <- abs(th[3])
+
+  Dyy <- 1 / (n_y * lam + tY)
+
+  if (a != 0) {
+    Dxx <- 1 / (exp(log(a^2 * n_y + n_x) + log(lam)) + tX -
+                exp(log(a^2 * n_y^2 * lam^2) - log(n_y * lam + tY)))
+    Dxy <- -Dxx * a * exp(log(n_y * lam) - log(n_y * lam + tY))
+    Dyy <- Dyy + exp(log(Dxx * (a^2 * n_y^2 * lam^2)) - (2 * log(n_y * lam + tY)))
+    asq_ny_sq_lam_sq_div_ny_lam_ty <- exp(log(a^2 * n_y^2 * lam^2) - log(n_y * lam + tY))
+  } else {
+    Dxx <- 1 / (exp(log(n_x) + log(lam)) + tX)
+    Dxy <- -Dxx * a * exp(log(n_y * lam) - log(n_y * lam + tY))
+    asq_ny_sq_lam_sq_div_ny_lam_ty <- 0 * lam
+  }
+
+  dX <- n_x * c_x + a * n_y * c_y
+  dY <- n_y * c_y
+  m <- length(c_x)
+
+  loglik <- -m * log(2 * pi) -
+            (1 / 2) * sum(log((a^2 * n_y + n_x) * lam + tX - asq_ny_sq_lam_sq_div_ny_lam_ty)) -
+            (1 / 2) * sum(log(n_y * lam + tY)) +
+            (1 / 2) * (sum(dX^2 * Dxx) + 2 * sum(dX * dY * Dxy) + sum(dY^2 * Dyy)) -
+            (n_x / 2) * sum((c_x^2) / lam) -
+            (n_y / 2) * sum((c_y^2) / lam) +
+            (m / 2) * (log(n_x) + log(n_y)) - sum(log(lam)) + (m / 2) * (log(tX) + log(tY))
+
+  return(-loglik)
+}
+
+mr_link2_loglik_alpha_h0 <- function(th, lam, cX, cY, nX, nY) {
+  return(mr_link2_loglik_reference_v2(c(0, th[1], th[2]), lam, cX, cY, nX, nY))
+}
+
+mr_link2_loglik_sigma_y_h0 <- function(th, lam, c_x, c_y, n_x, n_y) {
+  n_x <- as.numeric(n_x)
+  n_y <- as.numeric(n_y)
+  a <- th[1]
+  tX <- abs(th[2])
+
+  Dyy <- rep(0, length(lam))
+
+  if (a != 0) {
+    Dxx <- 1 / (exp(log(a^2 * n_y + n_x) + log(lam)) + tX)
+    Dxy <- rep(0, length(lam))
+    asq_ny_sq_lam_sq_div_ny_lam_ty <- rep(0, length(lam))
+  } else {
+    Dxx <- 1 / (exp(log(n_x) + log(lam)) + tX)
+    Dxy <- rep(0, length(lam))
+    asq_ny_sq_lam_sq_div_ny_lam_ty <- rep(0, length(lam))
+  }
+
+  dX <- n_x * c_x + a * n_y * c_y
+  dY <- n_y * c_y
+  m <- length(c_x)
+
+  loglik <- -m * log(2 * pi) -
+            (1 / 2) * sum(log((a^2 * n_y + n_x) * lam + tX - asq_ny_sq_lam_sq_div_ny_lam_ty)) +
+            (1 / 2) * (sum(dX^2 * Dxx) + 2 * sum(dX * dY * Dxy) + sum(dY^2 * Dyy)) -
+            (n_x / 2) * sum((c_x^2) / lam) -
+            (n_y / 2) * sum((c_y^2) / lam) +
+            (m / 2) * (log(n_x) + log(n_y)) - sum(log(lam)) + (m / 2) * log(tX)
+
+  return(-loglik)
+}
\ No newline at end of file
diff --git a/tests/test_for_r_and_python_concordance.py b/tests/test_for_r_and_python_concordance.py
new file mode 100644
index 0000000..083f1fb
--- /dev/null
+++ b/tests/test_for_r_and_python_concordance.py
@@ -0,0 +1,314 @@
+import pytest
+import sys
+import os
+
+## For the python part
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from mr_link_2_standalone import *
+
+
+## for the R part
+
+from rpy2.robjects import r, numpy2ri
+from rpy2.robjects.packages import importr
+
+# Activate automatic conversion between numpy and R objects
+numpy2ri.activate()
+
+# Load the base R package
+base = importr('base')
+# Source the R file with the functions
+r['source']('../R/mr_link_2_functions.R')
+
+"""
+Now, we perform unit tests on the MR-link-2 functions. to ensure that there is no creep of the 
+First function v0
+"""
+def test_mr_link2_loglik_reference_v0_basic_r_concordance():
+    th = np.array([0.1, 0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    expected_result = 22.944216071347796
+    python_result = mr_link2_loglik_reference_v0(th, lam, c_x, c_y, n_x, n_y)
+
+    assert np.isclose(python_result, expected_result)
+    assert np.isclose(python_result, mr_link2_loglik_reference_v2(th, lam, c_x, c_y, n_x, n_y))
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+
+def test_mr_link2_loglik_reference_v0_zero_lam_r_concordance():
+    th = np.array([0.1, 0.01, 0.01])
+    lam = np.array([0, 0, 0])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    python_result = mr_link2_loglik_reference_v0(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isnan(python_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isnan(r_result[0])
+
+def test_mr_link2_loglik_reference_v0_large_values_r_concordance():
+    th = np.array([0.5, 1e10, 1e10])
+    lam = np.array([1e5, 1e5, 1e5])
+    c_x = np.array([1e2, 1e2, 1e2])
+    c_y = np.array([1e2, 1e2, 1e2])
+    n_x = 1e6
+    n_y = 1e6
+
+    expected_result = 17617.166270043643
+    python_result = mr_link2_loglik_reference_v0(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(python_result, expected_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+def test_mr_link2_loglik_reference_v0_negative_values_r_concordance():
+    th = np.array([0.1, -0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    expected_result = 22.944216071347796
+    python_result = mr_link2_loglik_reference_v0(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(python_result, expected_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+
+def test_mr_link2_loglik_reference_v2_basic_r_concordance():
+    th = np.array([0.1, 0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    expected_result = 22.944216071347796
+    python_result = mr_link2_loglik_reference_v2(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(python_result, expected_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+
+
+def test_mr_link2_loglik_reference_v2_zero_lam_r_concordance():
+    th = np.array([0.1, 0.01, 0.01])
+    lam = np.array([0, 0, 0])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    python_result = mr_link2_loglik_reference_v2(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isnan(python_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isnan(r_result[0])
+
+
+def test_mr_link2_loglik_reference_v2_large_values_r_concordance():
+    th = np.array([0.5, 1e10, 1e10])
+    lam = np.array([1e5, 1e5, 1e5])
+    c_x = np.array([1e2, 1e2, 1e2])
+    c_y = np.array([1e2, 1e2, 1e2])
+    n_x = 1e6
+    n_y = 1e6
+
+    expected_result = 17617.166270043643
+    python_result = mr_link2_loglik_reference_v2(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(python_result, expected_result)
+
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+
+def test_mr_link2_loglik_reference_v2_negative_values_r_concordance():
+    th = np.array([0.1, -0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    c_x = np.array([1, 1, 1])
+    c_y = np.array([2, 2, 2])
+    n_x = 100
+    n_y = 100
+
+    expected_result = 22.944216071347796
+    python_result = mr_link2_loglik_reference_v2(th, lam, c_x, c_y, n_x, n_y)
+
+    # Check the Python result against the expected value
+    assert np.isclose(python_result, expected_result)
+
+    # Call the R function and check if it matches the expected value
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(r_result[0], expected_result)
+
+    # Also check against the mr_link2_loglik_reference_v0 function
+    reference_result_v0 = mr_link2_loglik_reference_v0(th, lam, c_x, c_y, n_x, n_y)
+    assert np.isclose(python_result, reference_result_v0)
+
+
+"""
+As we do use them as functions, we want to make sure that the h0 functions also work correctly.
+"""
+def test_mr_link2_loglik_alpha_h0_basic_r_concordance():
+    th = np.array([0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    cX = np.array([1, 1, 1])
+    cY = np.array([2, 2, 2])
+    nX = 100
+    nY = 100
+
+    python_result = mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY)
+    reference_result = mr_link2_loglik_reference_v2([0.0] + list(th), lam, cX, cY, nX, nY)
+
+    assert np.isclose(python_result, reference_result)
+
+    r_result = r['mr_link2_loglik_alpha_h0'](th, lam, cX, cY, nX, nY)
+    assert np.isclose(r_result[0], reference_result)
+
+
+def test_mr_link2_loglik_alpha_h0_zero_lam_r_concordance():
+    th = np.array([0.01, 0.01])
+    lam = np.array([0, 0, 0])
+    cX = np.array([1, 1, 1])
+    cY = np.array([2, 2, 2])
+    nX = 100
+    nY = 100
+    result = mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY)
+    assert np.isnan(result)
+    assert np.isnan(mr_link2_loglik_reference_v2([0.0] + list(th), lam, cX, cY, nX, nY))
+def test_mr_link2_loglik_alpha_h0_large_value_r_concordances():
+    th = np.array([1e10, 1e10])
+    lam = np.array([1e5, 1e5, 1e5])
+    cX = np.array([1e2, 1e2, 1e2])
+    cY = np.array([1e2, 1e2, 1e2])
+    nX = 1e6
+    nY = 1e6
+
+    # Call the Python function for alpha_h0
+    python_result = mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY)
+
+    # Call the Python reference function for comparison
+    reference_result = mr_link2_loglik_reference_v2([0.0] + list(th), lam, cX, cY, nX, nY)
+
+    # Validate Python result
+    assert isinstance(python_result, float)
+    assert np.isclose(python_result, reference_result)
+
+    # Call the R function for alpha_h0 and compare with the Python result
+    r_result = r['mr_link2_loglik_alpha_h0'](th, lam, cX, cY, nX, nY)
+    assert np.isclose(r_result[0], reference_result)
+
+
+def test_mr_link2_loglik_alpha_h0_negative_values_r_concordance():
+    th = np.array([0.01, -0.01])
+    lam = np.array([1, 2, 3])
+    cX = np.array([1, 1, 1])
+    cY = np.array([2, 2, 2])
+    nX = 100
+    nY = 100
+
+    # Call the Python function for alpha_h0
+    python_result = mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY)
+
+    # Call the Python reference function for comparison
+    reference_result = mr_link2_loglik_reference_v2([0.0] + list(th), lam, cX, cY, nX, nY)
+
+    # Validate Python result
+    assert isinstance(python_result, float)
+    assert np.isclose(python_result, reference_result)
+
+    # Call the R function for alpha_h0 and compare with the Python result
+    r_result = r['mr_link2_loglik_alpha_h0'](th, lam, cX, cY, nX, nY)
+    assert np.isclose(r_result[0], reference_result)
+
+
+def test_mr_link2_loglik_alpha_h0_large_population_r_concordance():
+    th = np.array([0.01, 0.01])
+    lam = np.array([1, 2, 3])
+    cX = np.array([1, 1, 1])
+    cY = np.array([2, 2, 2])
+    nX = 1e12
+    nY = 1e12
+
+    # Call the Python function for alpha_h0
+    python_result = mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY)
+
+    # Call the Python reference function for comparison
+    reference_result = mr_link2_loglik_reference_v2([0.0] + list(th), lam, cX, cY, nX, nY)
+
+    # Validate Python result
+    assert isinstance(python_result, float)
+    assert np.isclose(python_result, reference_result)
+
+    # Call the R function for alpha_h0 and compare with the Python result
+    r_result = r['mr_link2_loglik_alpha_h0'](th, lam, cX, cY, nX, nY)
+    assert np.isclose(r_result[0], reference_result)
+
+
+"""
+FUZZ TESTING many many many times.
+"""
+FUZZ_ITERATIONS = 10000
+
+
+def random_input_generator():
+    """Generate random inputs for the test functions."""
+    for _ in range(FUZZ_ITERATIONS):
+        th = np.random.uniform(-1e3, 1e3, size=3)
+        lam = np.random.uniform(1e-5, 1e3, size=500)
+        cX = np.random.uniform(-1e3, 1e3, size=500)
+        cY = np.random.uniform(-1e3, 1e3, size=500)
+        nX = np.random.uniform(50, 1e7)
+        nY = np.random.uniform(50, 1e7)
+
+        yield th, lam, cX, cY, nX, nY
+
+
+@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator())
+def test_fuzz_mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY):
+    """Fuzz testing for mr_link2_loglik_reference_v2 between Python and R implementations."""
+    # Python function result
+    python_result = mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY)
+
+    # Call R function
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, cX, cY, nX, nY)
+
+    # Convert R result to Python float for comparison
+    r_result_py = float(r_result[0])
+
+    # Compare both results using np.isclose()
+    assert np.isclose(python_result, r_result_py, rtol=1e-5,
+                      atol=1e-8), f"Mismatch for th={th}, lam={lam}, cX={cX}, cY={cY}, nX={nX}, nY={nY}"
+
+
+@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator())
+def test_fuzz_mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY):
+    """Fuzz testing for mr_link2_loglik_alpha_h0 between Python and R implementations."""
+    # Use only the first two elements of 'th' for alpha_h0
+    th_h0 = th[:2]
+
+    # Python function result
+    python_result = mr_link2_loglik_alpha_h0(th_h0, lam, cX, cY, nX, nY)
+
+    # Call R function
+    r_result = r['mr_link2_loglik_alpha_h0'](th_h0, lam, cX, cY, nX, nY)
+
+    # Convert R result to Python float for comparison
+    r_result_py = float(r_result[0])
+
+    # Compare both results using np.isclose()
+    assert np.isclose(python_result, r_result_py, rtol=1e-5,
+                      atol=1e-8), f"Mismatch for th={th}, lam={lam}, cX={cX}, cY={cY}, nX={nX}, nY={nY}"
\ No newline at end of file

From 4d0421fcff82078879f498ed98ee4f810db03329 Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Mon, 7 Oct 2024 10:54:18 +0200
Subject: [PATCH 2/6] Building an R version of MR link 2 and testing for
 equivalence of likelihood function

---
 tests/test_for_r_and_python_concordance.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/test_for_r_and_python_concordance.py b/tests/test_for_r_and_python_concordance.py
index 083f1fb..67e32f0 100644
--- a/tests/test_for_r_and_python_concordance.py
+++ b/tests/test_for_r_and_python_concordance.py
@@ -311,4 +311,20 @@ def test_fuzz_mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY):
 
     # Compare both results using np.isclose()
     assert np.isclose(python_result, r_result_py, rtol=1e-5,
-                      atol=1e-8), f"Mismatch for th={th}, lam={lam}, cX={cX}, cY={cY}, nX={nX}, nY={nY}"
\ No newline at end of file
+                      atol=1e-8), f"Mismatch for th={th}, lam={lam}, cX={cX}, cY={cY}, nX={nX}, nY={nY}"
+
+
+
+@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", [
+    (np.array([1e-10, 1e-10, 1e-10]), np.array([1e-10, 1e-10, 1e-10]), np.array([1e-10, 1e-10, 1e-10]), np.array([1e-10, 1e-10, 1e-10]), 1e10, 1e10),
+    (np.array([1e10, 1e10, 1e10]), np.array([1e5, 1e5, 1e5]), np.array([1e2, 1e2, 1e2]), np.array([1e2, 1e2, 1e2]), 1e10, 1e5),
+    (np.array([0, 0, 0]), np.array([1, 2, 3]), np.array([0, 0, 0]), np.array([0, 0, 0]), 100, 100),
+    # Add other extreme cases here...
+])
+def test_edge_cases_mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY):
+    """Test edge cases for equivalence between Python and R implementations."""
+    python_result = mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY)
+    r_result = r['mr_link2_loglik_reference_v2'](th, lam, cX, cY, nX, nY)
+    r_result_py = float(r_result[0])
+
+    assert np.isclose(python_result, r_result_py, rtol=1e-5, atol=1e-8)
\ No newline at end of file

From 73fb48ded69ceb4b898978a100de46ca04ec7e8c Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Mon, 7 Oct 2024 13:50:16 +0200
Subject: [PATCH 3/6] Added integration tests of MR-link-2 in R up to 2.5%
 tolerance

---
 .github/workflows/test.yml                 |  22 +++-
 R/mr_link_2_functions.R                    | 138 +++++++++++++++++++++
 tests/test_for_r_and_python_concordance.py | 107 +++++++++++++++-
 3 files changed, 258 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 51781e3..4eb9177 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,18 +1,34 @@
 name: Test
 on: [push, pull_request]
+
 jobs:
   test:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
+
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
           python-version: '3.x'
-      - name: Install dependencies
+
+      - name: Install R
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y r-base r-base-dev
+
+      - name: Cache pip dependencies
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install Python dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install numpy scipy pandas pytest bitarray duckdb pyarrow
+          pip install numpy scipy pandas pytest bitarray duckdb pyarrow rpy2
 
       - name: Download and install plink
         run: |
@@ -26,4 +42,4 @@ jobs:
         run: pytest tests/unit_tests_mr_link_2.py
 
       - name: Run integration tests
-        run: pytest tests/integration_tests_mr_link_2.py
+        run: pytest tests/integration_tests_mr_link_2.py
\ No newline at end of file
diff --git a/R/mr_link_2_functions.R b/R/mr_link_2_functions.R
index b0dfb43..09a6b12 100644
--- a/R/mr_link_2_functions.R
+++ b/R/mr_link_2_functions.R
@@ -69,4 +69,142 @@ mr_link2_loglik_sigma_y_h0 <- function(th, lam, c_x, c_y, n_x, n_y) {
             (m / 2) * (log(n_x) + log(n_y)) - sum(log(lam)) + (m / 2) * log(tX)
 
   return(-loglik)
+}
+
+
+
+mr_link2 <- function(selected_eigenvalues, selected_eigenvectors,
+                     exposure_betas, outcome_betas,
+                     n_exp, n_out, sigma_exp_guess, sigma_out_guess) {
+
+  start_time <- Sys.time()
+
+  # Define optimization options
+  method <- "Nelder-Mead"
+  control <- list(maxit = 100000)
+
+  # Calculate c_x and c_y
+  c_x <- t(selected_eigenvectors) %*% exposure_betas
+  c_y <- t(selected_eigenvectors) %*% outcome_betas
+
+  max_sigma <- sqrt(.Machine$double.xmax)
+
+  # Alpha h0 estimation
+  alpha_h0_guesses <- list(c(sigma_exp_guess, sigma_out_guess),
+                           c(max_sigma, max_sigma),
+                           c(1, max_sigma),
+                           c(1e3, 1e3))
+
+  alpha_h0_results <- optim(par = alpha_h0_guesses[[1]],
+                            fn = mr_link2_loglik_alpha_h0,
+                            gr = NULL,
+                            selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                            method = method, control = control)
+
+  # Loop over other guesses
+  for (alpha_h0_guess in alpha_h0_guesses[-1]) {
+    if (alpha_h0_results$convergence == 0) break
+
+    new_alpha_h0_results <- optim(par = alpha_h0_guess,
+                                  fn = mr_link2_loglik_alpha_h0,
+                                  gr = NULL,
+                                  selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                                  method = method, control = control)
+
+    if (alpha_h0_results$value >= new_alpha_h0_results$value) {
+      alpha_h0_results <- new_alpha_h0_results
+    }
+  }
+
+  # Sigma_y estimation
+  sigma_y_guesses <- list(c(0.0, sigma_exp_guess),
+                          c(1.0, sigma_exp_guess),
+                          c(0.0, alpha_h0_results$par[1]),
+                          c(1.0, alpha_h0_results$par[1]),
+                          c(0.0, max_sigma),
+                          c(1e-10, max_sigma))
+
+  sigma_y_h0_results <- optim(par = sigma_y_guesses[[1]],
+                              fn = mr_link2_loglik_sigma_y_h0,
+                              gr = NULL,
+                              selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                              method = method, control = control)
+
+  for (sigma_y_guess in sigma_y_guesses[-1]) {
+    if (sigma_y_h0_results$convergence == 0) break
+
+    new_sigma_y_h0_results <- optim(par = sigma_y_guess,
+                                    fn = mr_link2_loglik_sigma_y_h0,
+                                    gr = NULL,
+                                    selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                                    method = method, control = control)
+
+    if (new_sigma_y_h0_results$value < sigma_y_h0_results$value) {
+      sigma_y_h0_results <- new_sigma_y_h0_results
+    }
+  }
+
+  # Ha estimation
+  ha_guesses <- list(c(0.0, alpha_h0_results$par[1], alpha_h0_results$par[2]),
+                     c(sigma_y_h0_results$par[1], sigma_y_h0_results$par[2], sqrt(.Machine$double.xmax)),
+                     c(1.0, alpha_h0_results$par[1], alpha_h0_results$par[2]),
+                     c(1e-10, max_sigma, max_sigma))
+
+  ha_results <- optim(par = ha_guesses[[1]],
+                      fn = mr_link2_loglik_reference_v2,
+                      gr = NULL,
+                      selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                      method = method, control = control)
+
+  for (ha_guess in ha_guesses[-1]) {
+    if (ha_results$convergence == 0) break
+
+    new_ha_result <- optim(par = ha_guess,
+                           fn = mr_link2_loglik_reference_v2,
+                           gr = NULL,
+                           selected_eigenvalues, c_x, c_y, n_exp, n_out,
+                           method = method, control = control)
+
+    if (new_ha_result$value < ha_results$value) {
+      ha_results <- new_ha_result
+    }
+  }
+
+  # Likelihood Ratio Test and Estimation
+  alpha <- ha_results$par[1]
+  alpha_chi_sq <- 2 * (alpha_h0_results$value - ha_results$value)
+  alpha_p_val <- pchisq(alpha_chi_sq, df = 1, lower.tail = FALSE)
+  z_alpha <- ifelse(alpha_chi_sq <= 0, 0.0, sign(alpha) * sqrt(alpha_chi_sq))
+  se_alpha <- ifelse(z_alpha != 0, alpha / z_alpha, NA)
+
+  sigma_y <- 1 / abs(ha_results$par[3])
+  sigma_y_chi_sq <- 2 * (sigma_y_h0_results$value - ha_results$value)
+  sigma_y_p_val <- pchisq(sigma_y_chi_sq, df = 1, lower.tail = FALSE)
+  z_sigma_y <- ifelse(sigma_y_chi_sq <= 0, 0.0, sqrt(sigma_y_chi_sq))
+  se_sigma_y <- ifelse(z_sigma_y != 0, sigma_y / z_sigma_y, NA)
+
+  # Return results as a list (equivalent to Python's dictionary)
+  list(
+    alpha = alpha,
+    `se(alpha)` = se_alpha,
+    `p(alpha)` = alpha_p_val,
+    sigma_y = sigma_y,
+    `se(sigma_y)` = se_sigma_y,
+    `p(sigma_y)` = sigma_y_p_val,
+    sigma_x = 1 / abs(ha_results$par[2]),
+    alpha_h0_sigma_x = 1 / abs(alpha_h0_results$par[1]),
+    alpha_h0_sigma_y = 1 / abs(alpha_h0_results$par[2]),
+    alpha_h0_loglik = alpha_h0_results$value,
+    sigma_y_h0_alpha = sigma_y_h0_results$par[1],
+    sigma_y_h0_sigma_x = 1 / abs(sigma_y_h0_results$par[2]),
+    sigma_y_h0_loglik = sigma_y_h0_results$value,
+    ha_loglik = ha_results$value,
+    optim_alpha_h0_success = alpha_h0_results$convergence == 0,
+    optim_alpha_h0_nit = alpha_h0_results$counts,
+    optim_sigma_y_h0_success = sigma_y_h0_results$convergence == 0,
+    optim_sigma_y_h0_nit = sigma_y_h0_results$counts,
+    optim_ha_success = ha_results$convergence == 0,
+    optim_ha_nit = ha_results$counts,
+    function_time = as.numeric(difftime(Sys.time(), start_time, units = "secs"))
+  )
 }
\ No newline at end of file
diff --git a/tests/test_for_r_and_python_concordance.py b/tests/test_for_r_and_python_concordance.py
index 67e32f0..be640b9 100644
--- a/tests/test_for_r_and_python_concordance.py
+++ b/tests/test_for_r_and_python_concordance.py
@@ -10,8 +10,11 @@
 
 ## for the R part
 
-from rpy2.robjects import r, numpy2ri
+from rpy2.robjects import r, numpy2ri, default_converter
+from rpy2.robjects.vectors import FloatVector
 from rpy2.robjects.packages import importr
+from rpy2.robjects.conversion import localconverter
+
 
 # Activate automatic conversion between numpy and R objects
 numpy2ri.activate()
@@ -261,10 +264,10 @@ def test_mr_link2_loglik_alpha_h0_large_population_r_concordance():
 """
 FUZZ TESTING many many many times.
 """
-FUZZ_ITERATIONS = 10000
+FUZZ_ITERATIONS = 1_000
 
 
-def random_input_generator():
+def random_input_generator_likelihood_functions():
     """Generate random inputs for the test functions."""
     for _ in range(FUZZ_ITERATIONS):
         th = np.random.uniform(-1e3, 1e3, size=3)
@@ -277,7 +280,7 @@ def random_input_generator():
         yield th, lam, cX, cY, nX, nY
 
 
-@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator())
+@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator_likelihood_functions())
 def test_fuzz_mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY):
     """Fuzz testing for mr_link2_loglik_reference_v2 between Python and R implementations."""
     # Python function result
@@ -294,7 +297,7 @@ def test_fuzz_mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY):
                       atol=1e-8), f"Mismatch for th={th}, lam={lam}, cX={cX}, cY={cY}, nX={nX}, nY={nY}"
 
 
-@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator())
+@pytest.mark.parametrize("th, lam, cX, cY, nX, nY", random_input_generator_likelihood_functions())
 def test_fuzz_mr_link2_loglik_alpha_h0(th, lam, cX, cY, nX, nY):
     """Fuzz testing for mr_link2_loglik_alpha_h0 between Python and R implementations."""
     # Use only the first two elements of 'th' for alpha_h0
@@ -327,4 +330,96 @@ def test_edge_cases_mr_link2_loglik_reference_v2(th, lam, cX, cY, nX, nY):
     r_result = r['mr_link2_loglik_reference_v2'](th, lam, cX, cY, nX, nY)
     r_result_py = float(r_result[0])
 
-    assert np.isclose(python_result, r_result_py, rtol=1e-5, atol=1e-8)
\ No newline at end of file
+    assert np.isclose(python_result, r_result_py, rtol=1e-5, atol=1e-8)
+
+
+"""
+Integration tests of the R functions
+"""
+
+FUZZ_TEST_ITERATIONS = 100
+def random_input_generator_mr_link_2_function():
+    np.random.seed(42)
+    """Generate random inputs for fuzz testing."""
+    for _ in range(FUZZ_TEST_ITERATIONS):
+        # Generate random 'selected_eigenvalues', 'selected_eigenvectors', 'exposure_betas', and 'outcome_betas'
+        selected_eigenvalues = np.random.uniform(0.1, 2.0, size=100)
+        selected_eigenvectors = np.random.normal(size=(100, 100))
+        exposure_betas = np.random.normal(size=100)
+        outcome_betas = np.random.normal(size=100)
+        n_exp = np.random.randint(500, 5000)  # Randomize number of individuals
+        n_out = np.random.randint(500, 5000)  # Randomize number of individuals
+        sigma_exp_guess = np.random.uniform(0.1, 5.0)  # Randomize sigma guesses
+        sigma_out_guess = np.random.uniform(0.1, 5.0)
+
+        yield selected_eigenvalues, selected_eigenvectors, exposure_betas, outcome_betas, n_exp, n_out, sigma_exp_guess, sigma_out_guess
+
+
+@pytest.mark.parametrize(
+    "selected_eigenvalues, selected_eigenvectors, exposure_betas, outcome_betas, n_exp, n_out, sigma_exp_guess, sigma_out_guess",
+    random_input_generator_mr_link_2_function())
+def test_mr_link2_equivalence_fuzz(selected_eigenvalues, selected_eigenvectors, exposure_betas, outcome_betas, n_exp,
+                                   n_out, sigma_exp_guess, sigma_out_guess):
+    """
+
+    Fuzz testing for MR-Link2 equivalence between Python and R implementations.
+
+    Currently only testing until a threshold of 2.5% difference. If testing more samples, this may be exceeded.
+
+    It seems that there are differences between the optimizers which I cannot resolve easily for now.
+    That being said, the p values will be withing 5% of each other, so a python p value of 0.1 will be found within
+    the (0.095 - 0.105) interval.
+
+    """
+
+    # Run Python version of mr_link2
+    python_result = mr_link2(
+        selected_eigenvalues=selected_eigenvalues,
+        selected_eigenvectors=selected_eigenvectors,
+        exposure_betas=exposure_betas,
+        outcome_betas=outcome_betas,
+        n_exp=n_exp,
+        n_out=n_out,
+        sigma_exp_guess=sigma_exp_guess,
+        sigma_out_guess=sigma_out_guess
+    )
+
+    # Convert inputs to R-compatible types and run R version of mr_link2
+    with localconverter(default_converter + numpy2ri.converter):
+        r_selected_eigenvalues = r.matrix(selected_eigenvalues, nrow=100, ncol=1)
+        r_selected_eigenvectors = r.matrix(selected_eigenvectors, nrow=100, ncol=100)
+        r_exposure_betas = r.matrix(exposure_betas, nrow=100, ncol=1)
+        r_outcome_betas = r.matrix(outcome_betas, nrow=100, ncol=1)
+        r_n_exp = n_exp
+        r_n_out = n_out
+        r_sigma_exp_guess = sigma_exp_guess
+        r_sigma_out_guess = sigma_out_guess
+
+        # Call the R function
+        r_result = r['mr_link2'](
+            r_selected_eigenvalues, r_selected_eigenvectors,
+            r_exposure_betas, r_outcome_betas,
+            r_n_exp, r_n_out, r_sigma_exp_guess, r_sigma_out_guess
+        )
+
+    # Convert R result back to Python dictionary format
+    r_result_dict = {str(key): value[0] for key, value in r_result.items()}
+
+    optimization_indicators = ['optim_alpha_h0_success', 'optim_sigma_y_h0_success', 'optim_ha_success']
+    for result in [python_result, r_result_dict]:
+        for indicator in optimization_indicators:
+            if not result[indicator]:
+                print('No correct optimization.')
+                return
+
+    # Define which results to compare with high precision
+    high_precision_keys = ['alpha_h0_loglik', 'ha_loglik', ]
+    for key in high_precision_keys:
+        assert np.isclose(python_result[key], r_result_dict[key], rtol=1e-4,
+                          atol=1e-8), f"Mismatch for {key}: Python={python_result[key]}, R={r_result_dict[key]}"
+
+    # Define which results to compare with low precision
+    low_precision_keys = ['alpha', 'sigma_y', 'sigma_x','p(alpha)', 'p(sigma_y)']
+    for key in low_precision_keys:
+        assert np.isclose(python_result[key], r_result_dict[key], rtol=2.5e-2, ## this is pretty suboptimal, but as this also includes P values, there will not be a crazy amount of difference here.
+                          atol=1e-8), f"Mismatch for {key}: Python={python_result[key]}, R={r_result_dict[key]}"

From ae3eb0b9811bb3de3708eeb96d58230640335767 Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Mon, 7 Oct 2024 15:26:31 +0200
Subject: [PATCH 4/6] MR-link-2 for R in working condition. Not as featureful
 as the python version, but will produce the same results

---
 R/mr_link_2_functions.R                    | 81 +++++++++++++++++++-
 tests/integration_tests_mr_link_2.py       |  5 --
 tests/test_for_r_and_python_concordance.py | 86 +++++++++++++++++++++-
 3 files changed, 164 insertions(+), 8 deletions(-)

diff --git a/R/mr_link_2_functions.R b/R/mr_link_2_functions.R
index 09a6b12..a150837 100644
--- a/R/mr_link_2_functions.R
+++ b/R/mr_link_2_functions.R
@@ -81,7 +81,7 @@ mr_link2 <- function(selected_eigenvalues, selected_eigenvectors,
 
   # Define optimization options
   method <- "Nelder-Mead"
-  control <- list(maxit = 100000)
+  control <- list(maxit = 300)
 
   # Calculate c_x and c_y
   c_x <- t(selected_eigenvectors) %*% exposure_betas
@@ -207,4 +207,83 @@ mr_link2 <- function(selected_eigenvalues, selected_eigenvectors,
     optim_ha_nit = ha_results$counts,
     function_time = as.numeric(difftime(Sys.time(), start_time, units = "secs"))
   )
+}
+
+remove_highly_correlated <- function(ld_matrix, snp_ordering, max_correlation) {
+  # Remove NAs from the correlation matrix
+  ld_matrix[is.na(ld_matrix)] <- 0
+
+  # Initialize a logical vector to mark rows/columns to remove
+  idxs_to_remove <- rep(FALSE, nrow(ld_matrix))
+
+  # Find pairs of SNPs that are highly correlated
+  correlated_indices <- which(abs(ld_matrix) >= max_correlation, arr.ind = TRUE)
+
+  to_remove <- vector("logical", length = nrow(ld_matrix))
+
+  # Loop through correlated pairs and mark SNPs to remove
+  for (i in seq_len(nrow(correlated_indices))) {
+    a <- correlated_indices[i, 1]
+    b <- correlated_indices[i, 2]
+
+    if ((!to_remove[a] && !to_remove[b] ) && (a != b)) {
+      to_remove[b] <- TRUE
+    }
+  }
+
+  # Remove highly correlated SNPs from both the matrix and the SNP list
+  ld_matrix <- ld_matrix[!to_remove, !to_remove]
+  pruned_snps <- snp_ordering[!to_remove]
+
+  return(list(ld_matrix = ld_matrix, pruned_snps = pruned_snps))
+}
+
+mr_link2_analysis <- function(exposure_betas, outcome_betas, ld_matrix, n_exp, n_out, max_correlation = 0.99) {
+  # Prune highly correlated SNPs
+  snp_ordering <- seq_len(nrow(ld_matrix))  # Simple numbering for SNPs
+  pruned_data <- remove_highly_correlated(ld_matrix, snp_ordering, max_correlation)
+  pruned_ld_matrix <- pruned_data$ld_matrix
+  pruned_snps <- pruned_data$pruned_snps
+
+  # Eigenvalue decomposition
+  eigen_decomp <- eigen(pruned_ld_matrix)
+  eigenvalues <- eigen_decomp$values
+  eigenvectors <- eigen_decomp$vectors
+
+  # Calculate cumulative variance explained
+  variance_explained <- cumsum(eigenvalues) / sum(eigenvalues)
+
+  # Select the eigenvectors that explain at least 99% of the variance
+  threshold_index <- min(which(variance_explained >= 0.99))
+  selected_eigenvectors <- eigenvectors[, 1:threshold_index]
+  selected_eigenvalues <- eigenvalues[1:threshold_index]
+
+  # Call the existing MR-link-2 function
+  # Assuming `mr_link2` is already defined in R and takes the following arguments:
+  # - selected_eigenvalues
+  # - selected_eigenvectors
+  # - exposure_betas
+  # - outcome_betas
+  # - n_exp (number of individuals in exposure dataset)
+  # - n_out (number of individuals in outcome dataset)
+  sigma_exp_guess <- 0.01  # You can adjust or estimate this as needed
+  sigma_out_guess <- 0.001  # You can adjust or estimate this as needed
+
+  # Subset the betas based on pruned SNPs
+  exposure_betas_pruned <- exposure_betas[pruned_snps]
+  outcome_betas_pruned <- outcome_betas[pruned_snps]
+
+  # Perform the MR-link-2 estimation
+  mr_result <- mr_link2(
+    selected_eigenvalues = selected_eigenvalues,
+    selected_eigenvectors = selected_eigenvectors,
+    exposure_betas = exposure_betas_pruned,
+    outcome_betas = outcome_betas_pruned,
+    n_exp = n_exp,
+    n_out = n_out,
+    sigma_exp_guess = sigma_exp_guess,
+    sigma_out_guess = sigma_out_guess
+  )
+
+  return(mr_result)
 }
\ No newline at end of file
diff --git a/tests/integration_tests_mr_link_2.py b/tests/integration_tests_mr_link_2.py
index d8f3157..0f0945b 100644
--- a/tests/integration_tests_mr_link_2.py
+++ b/tests/integration_tests_mr_link_2.py
@@ -13,11 +13,6 @@
 from mr_link_2_standalone import *
 
 
-""""
-NB. These tests require us to have plink in your path. 
-So unfortunately I will not test them in the github workflow.
-"""
-
 def test_plink_version():
     result = subprocess.run(['plink', '--version'], capture_output=True, text=True)
     assert result.returncode == 0, "PLINK is not installed or not in the PATH"
diff --git a/tests/test_for_r_and_python_concordance.py b/tests/test_for_r_and_python_concordance.py
index be640b9..00e2154 100644
--- a/tests/test_for_r_and_python_concordance.py
+++ b/tests/test_for_r_and_python_concordance.py
@@ -11,7 +11,8 @@
 ## for the R part
 
 from rpy2.robjects import r, numpy2ri, default_converter
-from rpy2.robjects.vectors import FloatVector
+from rpy2.robjects.vectors import FloatVector, ListVector, StrVector
+
 from rpy2.robjects.packages import importr
 from rpy2.robjects.conversion import localconverter
 
@@ -22,7 +23,7 @@
 # Load the base R package
 base = importr('base')
 # Source the R file with the functions
-r['source']('../R/mr_link_2_functions.R')
+r['source'](os.path.abspath(os.path.join(os.path.dirname(__file__), '..',  'R', 'mr_link_2_functions.R')))
 
 """
 Now, we perform unit tests on the MR-link-2 functions. to ensure that there is no creep of the 
@@ -423,3 +424,84 @@ def test_mr_link2_equivalence_fuzz(selected_eigenvalues, selected_eigenvectors,
     for key in low_precision_keys:
         assert np.isclose(python_result[key], r_result_dict[key], rtol=2.5e-2, ## this is pretty suboptimal, but as this also includes P values, there will not be a crazy amount of difference here.
                           atol=1e-8), f"Mismatch for {key}: Python={python_result[key]}, R={r_result_dict[key]}"
+
+
+def test_remove_highly_correlated():
+    """Test that the highly correlated SNPs are correctly removed."""
+
+    # Example LD matrix with correlation values
+    ld_matrix = np.array([[1.0, 0.9, 0.1],
+                          [0.9, 1.0, 0.05],
+                          [0.1, 0.05, 1.0]])
+
+    snp_ordering = ["SNP1", "SNP2", "SNP3"]
+    max_correlation = 0.8  # Set the maximum correlation threshold
+
+    with localconverter(numpy2ri.converter):
+        r_ld_matrix = r.matrix(ld_matrix, nrow=3)
+        # Wrap the snp_ordering list into a dictionary format for ListVector
+        r_snp_ordering = StrVector(snp_ordering)
+
+        # Call the R function `remove_highly_correlated`
+        r_pruned_result = r['remove_highly_correlated'](r_ld_matrix, r_snp_ordering, max_correlation)
+
+    # Extract results from R output
+    pruned_ld_matrix = np.array(r_pruned_result['ld_matrix'])
+    pruned_snps = list(r_pruned_result['pruned_snps'])
+
+    # Expected pruned results (SNP2 is removed)
+    expected_pruned_ld_matrix = np.array([[1.0, 0.05], [0.05, 1.0]])
+    expected_pruned_snps = ["SNP2", "SNP3"]
+
+    # Assertions
+    assert np.allclose(pruned_ld_matrix, expected_pruned_ld_matrix), "LD matrix was not pruned correctly."
+    assert pruned_snps == expected_pruned_snps, f"SNP pruning failed. Expected {expected_pruned_snps}, got {pruned_snps}"
+
+# Test function for eigenvalue decomposition and MR-link-2 analysis
+def test_mr_link2_analysis():
+    """Test the MR-link-2 analysis on synthetic data."""
+
+    # Synthetic data
+    np.random.seed(42)
+    exposure_betas = np.random.normal(size=100)
+    outcome_betas = np.random.normal(size=100)
+    ld_matrix = np.random.normal(0, 1, (100, 100))
+    ld_matrix = (ld_matrix + ld_matrix.T) / 2  # Make it symmetric
+    np.fill_diagonal(ld_matrix, 1)  # Ensure diagonals are 1s (perfect correlation with self)
+
+    n_exp = 1000
+    n_out = 1000
+    max_correlation = 0.9
+
+    # Convert inputs to R-compatible types
+    with localconverter(numpy2ri.converter):
+        r_exposure_betas = FloatVector(exposure_betas)
+        r_outcome_betas = FloatVector(outcome_betas)
+        r_ld_matrix = r.matrix(ld_matrix, nrow=100)
+        r_n_exp = n_exp
+        r_n_out = n_out
+        r_max_correlation = max_correlation
+
+        # Call the R function `mr_link2_analysis`
+        r_mr_result = r['mr_link2_analysis'](r_exposure_betas, r_outcome_betas, r_ld_matrix, r_n_exp, r_n_out,
+                                             r_max_correlation)
+
+    # Convert the result back to a Python-friendly format
+    mr_result_dict = dict(r_mr_result)
+
+    # Check the output has expected keys and ranges
+    assert 'alpha' in mr_result_dict, "Key 'alpha' missing in MR result."
+    assert 'p(alpha)' in mr_result_dict, "Key 'p(alpha)' missing in MR result."
+    assert 'sigma_x' in mr_result_dict, "Key 'sigma_x' missing in MR result."
+
+    # Example value checks (can be replaced with actual expected ranges based on real data)
+    assert np.isfinite(mr_result_dict['alpha']), "Alpha value is not finite."
+    assert 0 <= mr_result_dict['p(alpha)'] <= 1, "P-value for alpha is out of bounds."
+    assert mr_result_dict['sigma_x'] > 0, "Sigma_x should be positive."
+
+    print("MR-link-2 result passed all checks.")
+
+
+# Run the tests
+if __name__ == '__main__':
+    pytest.main()

From 22efce8ed3e14bdc804962e89cff982b65109dfe Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Mon, 7 Oct 2024 15:29:28 +0200
Subject: [PATCH 5/6] Add R tests to the workflow

---
 .github/workflows/test.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4eb9177..f231d93 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -42,4 +42,7 @@ jobs:
         run: pytest tests/unit_tests_mr_link_2.py
 
       - name: Run integration tests
-        run: pytest tests/integration_tests_mr_link_2.py
\ No newline at end of file
+        run: pytest tests/integration_tests_mr_link_2.py
+
+      - name: Run comparison_tests_with_r_imlementation
+        run: pytest tests/test_for_r_and_python_concordance.py
\ No newline at end of file

From 46abc501758f74d4c2111a35b872993a3e4bb7d4 Mon Sep 17 00:00:00 2001
From: Adriaan van der Graaf <adriaan.vd.graaf@gmail.com>
Date: Tue, 29 Oct 2024 14:02:16 +0100
Subject: [PATCH 6/6] Explicitly casting something to a list to solve a bug
 caused by a version difference. the 2.9 version (list-like) of bitarray has a
 different API as the 3.0 version (iterator)

---
 mr_link_2_standalone.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mr_link_2_standalone.py b/mr_link_2_standalone.py
index 70b78f9..4b10a3a 100644
--- a/mr_link_2_standalone.py
+++ b/mr_link_2_standalone.py
@@ -98,7 +98,7 @@ def read_list_of_snps_into_geno_array(self, snps_to_load: set, missing_encoding=
         for i in range(n_variants):
             bits = bitarray.bitarray(endian="little")
             bits.frombytes(byte_array[offset:(offset+bytes_per_variant)])
-            array = bits.decode(self._decoder)[:self.n_individuals]
+            array = list(bits.decode(self._decoder))[:self.n_individuals]
             genotypes[:,i] = array
             offset+=bytes_per_variant