xarray-contrib · raybellwaves · Jan 19, 2021 · Jan 16, 2021 · Jan 17, 2021 · Jan 18, 2021
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -40,6 +40,7 @@ Internal Changes
   (:issue:`160`, :pr:`230`) `Ray Bell`_
 - Pin ``numba`` to ``>=0.52`` to fix CI (:issue:`233`, :pr:`234`) `Ray Bell`_
 - Refactor ``asv`` benchmarks. (:pr:`231`) `Aaron Spring`_
+- Added tests for nans in correlation metrics (:issue:`246`, :pr:`XXX`) `Ray Bell`_
 
 
 xskillscore v0.0.18 (2020-09-23)

diff --git a/xskillscore/tests/conftest.py b/xskillscore/tests/conftest.py
@@ -8,11 +8,28 @@
 
 
 @pytest.fixture
-def o():
+def times():
+    return xr.cftime_range(start="2000", periods=PERIODS, freq="D")
+
+
+@pytest.fixture
+def lats():
+    return np.arange(4)
+
+
+@pytest.fixture
+def lons():
+    return np.arange(5)
+
+
+@pytest.fixture
+def members():
+    return np.arange(3)
+
+
+@pytest.fixture
+def o(times, lats, lons):
     """Observation."""
-    times = xr.cftime_range(start="2000", periods=PERIODS, freq="D")
-    lats = np.arange(4)
-    lons = np.arange(5)
     data = np.random.rand(len(times), len(lats), len(lons))
     return xr.DataArray(
         data,
@@ -23,12 +40,8 @@ def o():
 
 
 @pytest.fixture
-def f_prob():
+def f_prob(times, lats, lons, members):
     """Probabilistic forecast containing also a member dimension."""
-    times = xr.cftime_range(start="2000", periods=PERIODS, freq="D")
-    members = np.arange(3)
-    lats = np.arange(4)
-    lons = np.arange(5)
     data = np.random.rand(len(members), len(times), len(lats), len(lons))
     return xr.DataArray(
         data,
@@ -59,20 +72,34 @@ def b(f):
 @pytest.fixture
 def a_nan(a):
     """Masked"""
-    return a.copy().where(a < 0.5)
+    return a.where(a < 0.5)
 
 
 @pytest.fixture
 def b_nan(b):
     """Masked"""
-    return b.copy().where(b < 0.5)
+    return b.where(b < 0.5)
+
+
+@pytest.fixture
+def a_nan_land(a):
+    """Masked block"""
+    a.data[:, 1:3, 1:3] = np.nan
+    return a
+
+
+@pytest.fixture
+def b_nan_land(b):
+    """Masked block"""
+    b.data[:, 1:3, 1:3] = np.nan
+    return b
 
 
 # with zeros
 @pytest.fixture
 def a_with_zeros(a):
     """Zeros"""
-    return a.copy().where(a < 0.5, 0)
+    return a.where(a < 0.5, 0)
 
 
 # dask
@@ -84,9 +111,22 @@ def a_dask(a):
 
 @pytest.fixture
 def b_dask(b):
+    """Chunked"""
     return b.chunk()
 
 
+@pytest.fixture
+def a_dask_nan(a_nan):
+    """Chunked"""
+    return a_nan.chunk()
+
+
+@pytest.fixture
+def b_dask_nan(b_nan):
+    """Chunked"""
+    return b_nan.chunk()
+
+
 @pytest.fixture
 def o_dask(o):
     return o.chunk()

diff --git a/xskillscore/tests/test_deterministic.py b/xskillscore/tests/test_deterministic.py
@@ -85,10 +85,16 @@ def adjust_weights(dim, weight_bool, weights):
 @pytest.mark.parametrize("metrics", correlation_metrics)
 @pytest.mark.parametrize("dim", AXES)
 @pytest.mark.parametrize("weight_bool", [True, False])
-def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):
+@pytest.mark.parametrize("skipna", [True, False])
+@pytest.mark.parametrize("has_nan", [True, False])
+def test_correlation_metrics_ufunc_same_np(
+    a, b, dim, weight_bool, weights, metrics, skipna, has_nan
+):
     """Test whether correlation metric for xarray functions (from
-    deterministic.py) give save numerical results as for numpy functions from
+    deterministic.py) give save numerical results as for numpy functions (from
     np_deterministic.py)."""
+    if has_nan:
+        a[0] = np.nan
     # unpack metrics
     metric, _metric = metrics
     # Only apply over time dimension for effective p value.
@@ -98,9 +104,9 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):
     # the numpy testing.
     _weights = adjust_weights(dim, weight_bool, weights)
     if metric in temporal_only_metrics:
-        actual = metric(a, b, dim)
+        actual = metric(a, b, dim, skipna=skipna)
     else:
-        actual = metric(a, b, dim, weights=_weights)
+        actual = metric(a, b, dim, weights=_weights, skipna=skipna)
     # check that no chunks for no chunk inputs
     assert actual.chunks is None
 
@@ -122,28 +128,69 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):
 
     axis = _a.dims.index(new_dim)
     if metric in temporal_only_metrics:
-        res = _metric(_a.values, _b.values, axis, skipna=False)
+        res = _metric(_a.values, _b.values, axis, skipna=skipna)
     else:
-        res = _metric(_a.values, _b.values, _weights, axis, skipna=False)
+        res = _metric(_a.values, _b.values, _weights, axis, skipna=skipna)
     expected = actual.copy()
     expected.values = res
     assert_allclose(actual, expected)
 
 
+@pytest.mark.parametrize("metrics", correlation_metrics)
+@pytest.mark.parametrize("dim", AXES)
+@pytest.mark.parametrize("weight_bool", [True, False])
+@pytest.mark.parametrize("skipna", [True, False])
+@pytest.mark.parametrize("has_nan", [True, False])
+def test_correlation_metrics_ufunc_dask_same_np(
+    a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan
+):
+    """Test whether correlation metric for xarray functions can be lazy when
+    chunked by using dask and give same results as np array."""
+    a = a_dask.copy()
+    b = b_dask.copy()
+    weights = weights_dask.copy()
+    if has_nan:
+        a = a.load()
+        a[0] = np.nan
+        a = a.chunk()
+    # unpack metrics
+    metric, _metric = metrics
+    # Only apply over time dimension for effective p value.
+    if (dim != "time") and (metric in temporal_only_metrics):
+        dim = "time"
+    # Generates subsetted weights to pass in as arg to main function and for
+    # the numpy testing.
+    _weights = adjust_weights(dim, weight_bool, weights)
+    if metric in temporal_only_metrics:
+        actual = metric(a, b, dim, skipna=skipna)
+    else:
+        actual = metric(a, b, dim, weights=_weights, skipna=skipna)
+    # check that chunks for chunk inputs
+    assert actual.chunks is not None
+    if _weights is not None:
+        _weights = _weights.load()
+    if metric in temporal_only_metrics:
+        expected = metric(a.load(), b.load(), dim, skipna=skipna)
+    else:
+        expected = metric(a.load(), b.load(), dim, _weights, skipna=skipna)
+    assert expected.chunks is None
+    assert_allclose(actual, expected)
+
+
 @pytest.mark.parametrize("metrics", distance_metrics)
 @pytest.mark.parametrize("dim", AXES)
 @pytest.mark.parametrize("weight_bool", [True, False])
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.parametrize("has_nan", [True, False])
-def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, has_nan):
+def test_distance_metrics_ufunc_same_np(
+    a, b, dim, weight_bool, weights, metrics, skipna, has_nan
+):
     """Test whether distance-based metric for xarray functions (from
-    deterministic.py) give save numerical results as for numpy functions from
+    deterministic.py) give save numerical results as for numpy functions (from
     np_deterministic.py)."""
-    # unpack metrics
-    a = a.copy()
     if has_nan:
         a[0] = np.nan
-
+    # unpack metrics
     metric, _metric = metrics
     # Generates subsetted weights to pass in as arg to main function and for
     # the numpy testing.
@@ -171,61 +218,23 @@ def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, h
     assert_allclose(actual, expected)
 
 
-@pytest.mark.parametrize("metrics", correlation_metrics)
-@pytest.mark.parametrize("dim", AXES)
-@pytest.mark.parametrize("weight_bool", [True, False])
-def test_correlation_metrics_xr_dask(
-    a_dask, b_dask, dim, weight_bool, weights_dask, metrics
-):
-    """Test whether correlation metric for xarray functions can be lazy when
-    chunked by using dask and give same results."""
-    a = a_dask
-    b = b_dask
-    weights = weights_dask
-    # unpack metrics
-    metric, _metric = metrics
-    # Only apply over time dimension for effective p value.
-    if (dim != "time") and (metric in temporal_only_metrics):
-        dim = "time"
-    # Generates subsetted weights to pass in as arg to main function and for
-    # the numpy testing.
-    _weights = adjust_weights(dim, weight_bool, weights)
-
-    if metric in temporal_only_metrics:
-        actual = metric(a, b, dim)
-    else:
-        actual = metric(a, b, dim, weights=_weights)
-    # check that chunks for chunk inputs
-    assert actual.chunks is not None
-
-    if _weights is not None:
-        _weights = _weights.load()
-
-    if metric in temporal_only_metrics:
-        expected = metric(a.load(), b.load(), dim)
-    else:
-        expected = metric(a.load(), b.load(), dim, _weights)
-    assert expected.chunks is None
-    assert_allclose(actual.compute(), expected)
-
-
 @pytest.mark.parametrize("metrics", distance_metrics)
 @pytest.mark.parametrize("dim", AXES)
 @pytest.mark.parametrize("weight_bool", [True, False])
 @pytest.mark.parametrize("skipna", [True, False])
 @pytest.mark.parametrize("has_nan", [True, False])
-def test_distance_metrics_xr_dask(
+def test_distance_metrics_ufunc_dask_same_np(
     a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan
 ):
-    """Test whether distance metrics for xarray functions can be lazy when
-    chunked by using dask and give same results."""
+    """Test whether distance metric for xarray functions can be lazy when
+    chunked by using dask and give same results as np array."""
     a = a_dask.copy()
+    b = b_dask.copy()
+    weights = weights_dask.copy()
     if has_nan:
         a = a.load()
         a[0] = np.nan
         a = a.chunk()
-    b = b_dask.copy()
-    weights = weights_dask.copy()
     # unpack metrics
     metric, _metric = metrics
     # Generates subsetted weights to pass in as arg to main function and for
@@ -244,7 +253,7 @@ def test_distance_metrics_xr_dask(
     else:
         expected = metric(a.load(), b.load(), dim, weights=_weights, skipna=skipna)
     assert expected.chunks is None
-    assert_allclose(actual.compute(), expected)
+    assert_allclose(actual, expected)
 
 
 @pytest.mark.parametrize("dim", AXES)

diff --git a/xskillscore/tests/test_mask_skipna.py b/xskillscore/tests/test_mask_skipna.py
@@ -29,21 +29,15 @@
 ]
 
 
-def mask_land_data(da):
-    """Masks sample data arbitrarily like a block of land."""
-    da.data[:, 1:3, 1:3] = np.nan
-    return da
-
-
 @pytest.mark.parametrize("metric", correlation_metrics + distance_metrics)
 @pytest.mark.parametrize("dim", AXES)
-def test_metrics_masked(a, b, dim, metric):
+def test_metrics_masked(a_nan_land, b_nan_land, dim, metric):
     """Test for all distance-based metrics whether result of skipna does not
     contain any nans when applied along dim with nans."""
-    a_masked = mask_land_data(a)
-    b_masked = mask_land_data(b)
-    res_skipna = metric(a_masked, b_masked, dim, skipna=True)
-    res_no_skipna = metric(a_masked, b_masked, dim, skipna=False)
+    a = a_nan_land
+    b = b_nan_land
+    res_skipna = metric(a, b, dim, skipna=True)
+    res_no_skipna = metric(a, b, dim, skipna=False)
 
     if "lon" in dim or "lat" in dim:  # metric is applied along axis with nans
         # res_skipna shouldnt have nans