jessegrabowski · jessegrabowski · Sep 2, 2024 · Sep 2, 2024 · Sep 2, 2024
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -39,7 +39,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: 3.7
+        python-version: 3.11
     - name: Give PyPI a chance to update the index
       run: sleep 240
     - name: Install from PyPI

diff --git a/tests/test_disaggregation.py b/tests/test_disaggregation.py
@@ -12,7 +12,7 @@
 
 from tsdisagg import disaggregate_series
 from tsdisagg.time_conversion import FREQ_CONVERSION_FACTORS, MONTHS, get_frequency_name
-from tsdisagg.ts_disagg import build_conversion_matrix
+from tsdisagg.ts_disagg import METHOD, build_conversion_matrix
 
 
 def generate_random_index_pair(
@@ -93,6 +93,39 @@ def frequencies(draw: Callable[[SearchStrategy[int]], int]) -> tuple[str, str]:
     return low_freq, high_freq
 
 
+@pytest.fixture()
+def exports_m():
+    exports_m = pd.read_csv("tests/data/exports_m.csv", index_col=0)
+    exports_m.index = pd.date_range(start="1972-01-01", freq="MS", periods=exports_m.shape[0])
+    exports_m.columns = ["exports"]
+    return exports_m
+
+
+@pytest.fixture()
+def sales_a():
+    sales_a = pd.read_csv("tests/data/sales_a.csv", index_col=0)
+    sales_a.index = pd.date_range(start="1975-01-01", freq="YS", periods=sales_a.shape[0])
+    sales_a.columns = ["sales"]
+    return sales_a
+
+
+@pytest.fixture()
+def exports_q():
+    exports_q = pd.read_csv("tests/data/exports_q.csv", index_col=0)
+    exports_q.index = pd.date_range(start="1972-01-01", freq="QS-OCT", periods=exports_q.shape[0])
+    exports_q.columns = ["exports"]
+    return exports_q
+
+
+@pytest.fixture()
+def imports_q():
+    imports_q = pd.read_csv("tests/data/imports_q.csv", index_col=0)
+    imports_q.index = pd.date_range(start="1972-01-01", freq="QS-OCT", periods=imports_q.shape[0])
+    imports_q.columns = ["imports"]
+
+    return imports_q
+
+
 @given(frequencies())
 @pytest.mark.parametrize("agg_func", ["sum", "mean", "first", "last"])
 def test_build_C_matrix(agg_func, frequencies):
@@ -388,18 +421,6 @@ def test_invalid_dataframe_warnings():
             agg_func="sum",
         )
 
-    with pytest.raises(ValueError, match="low_freq_df has missing values"):
-        disaggregate_series(
-            pd.DataFrame(
-                {"data": [1, np.nan, 3]}, index=pd.date_range("2020-01-01", periods=3, freq="D")
-            ),
-            pd.DataFrame(
-                {"data": [1, 2, 3]}, index=pd.date_range("2020-01-01", periods=3, freq="D")
-            ),
-            method="denton",
-            agg_func="sum",
-        )
-
     with pytest.raises(ValueError, match="high_freq_df has missing values"):
         disaggregate_series(
             pd.DataFrame(
@@ -475,5 +496,38 @@ def test_invalid_dataframe_warnings():
         )
 
 
+@pytest.mark.parametrize("method", ["denton", "chow-lin", "litterman"])
+@pytest.mark.parametrize("missing_in_center", [True, False])
+def test_disagg_with_internal_low_freq_missing(
+    sales_a, exports_q, exports_m, method: METHOD, missing_in_center
+):
+    sales_a = sales_a.copy()
+
+    if missing_in_center:
+        sales_a.iloc[10] = np.nan
+    else:
+        sales_a.iloc[0] = np.nan
+
+    result = disaggregate_series(
+        sales_a,
+        high_freq_df=exports_m.assign(Constant=1) if "denton" not in method else None,
+        method=method,
+        agg_func="sum",
+        target_freq="MS",
+        optimizer_kwargs={"method": "nelder-mead"},
+        verbose=False,
+    )
+
+    assert result.isna().sum() == 0
+
+    if "denton" in method:
+        assert np.all(
+            result.index
+            == pd.date_range(start=sales_a.index[0], periods=12 * sales_a.shape[0], freq="MS")
+        )
+    else:
+        assert np.all(result.index == exports_m.index)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tsdisagg/ts_disagg.py b/tsdisagg/ts_disagg.py
@@ -218,8 +218,8 @@
             "No datetime index found on the dataframe passed as argument to low_freq_df."
         )
 
-    if low_freq_df.isna().any().any():
-        raise ValueError("low_freq_df has missing values.")
+    # if low_freq_df.isna().any().any():
+    #     raise ValueError("low_freq_df has missing values.")
 
     if high_freq_df is not None:
         if not isinstance(high_freq_df.index, pd.core.indexes.datetimes.DatetimeIndex):
@@ -395,15 +395,16 @@
     )
 
     C = build_conversion_matrix(low_freq_df, high_freq_df, time_conversion_factor, agg_func)
-    drop_rows = np.all(C == 0, axis=1)
+    drop_rows = np.all(C == 0, axis=1) | low_freq_df.isna().values.ravel()
+
     if any(drop_rows):
         dropped = low_freq_df.index.strftime("%Y-%m-%d")[drop_rows]
         warnings.warn(
             f'Insufficent high-frequency data to decompose the following dates: {", ".join(dropped)}',
             UserWarning,
         )
 
-    y = df.iloc[:, target_idx].dropna().loc[~drop_rows]
+    y = low_freq_df.loc[~drop_rows].squeeze()
     C = C[~drop_rows, :]
     X = df.drop(columns=df.columns[target_idx])
 
@@ -463,7 +464,16 @@
     ul = y - C @ p
     y_hat = p + D @ ul
 
-    output = pd.Series(y_hat, index=df.index, name=target_column)
+    if not isinstance(y_hat, pd.Series | pd.DataFrame):
+        output = pd.Series(y_hat, index=df.index, name=target_column)
+    elif isinstance(y_hat, pd.Series):
+        output = y_hat
+        output.name = target_column
+    else:
+        output = y_hat.iloc[:, 0]
+        output.name = target_column
+
+    output.index = df.index
     output.index.freq = output.index.inferred_freq
 
     if return_optim_res and result is not None: