Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add nan corr tests #247

Merged
merged 6 commits into from
Jan 19, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ Internal Changes
(:issue:`160`, :pr:`230`) `Ray Bell`_
- Pin ``numba`` to ``>=0.52`` to fix CI (:issue:`233`, :pr:`234`) `Ray Bell`_
- Refactor ``asv`` benchmarks. (:pr:`231`) `Aaron Spring`_
- Added tests for nans in correlation metrics (:issue:`246`, :pr:`XXX`) `Ray Bell`_


xskillscore v0.0.18 (2020-09-23)
Expand Down
64 changes: 52 additions & 12 deletions xskillscore/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,28 @@


@pytest.fixture
def o():
def times():
return xr.cftime_range(start="2000", periods=PERIODS, freq="D")


@pytest.fixture
def lats():
return np.arange(4)


@pytest.fixture
def lons():
return np.arange(5)


@pytest.fixture
def members():
return np.arange(3)


@pytest.fixture
def o(times, lats, lons):
"""Observation."""
times = xr.cftime_range(start="2000", periods=PERIODS, freq="D")
lats = np.arange(4)
lons = np.arange(5)
data = np.random.rand(len(times), len(lats), len(lons))
return xr.DataArray(
data,
Expand All @@ -23,12 +40,8 @@ def o():


@pytest.fixture
def f_prob():
def f_prob(times, lats, lons, members):
"""Probabilistic forecast containing also a member dimension."""
times = xr.cftime_range(start="2000", periods=PERIODS, freq="D")
members = np.arange(3)
lats = np.arange(4)
lons = np.arange(5)
data = np.random.rand(len(members), len(times), len(lats), len(lons))
return xr.DataArray(
data,
Expand Down Expand Up @@ -59,20 +72,34 @@ def b(f):
@pytest.fixture
def a_nan(a):
"""Masked"""
return a.copy().where(a < 0.5)
return a.where(a < 0.5)


@pytest.fixture
def b_nan(b):
"""Masked"""
return b.copy().where(b < 0.5)
return b.where(b < 0.5)


@pytest.fixture
def a_nan_land(a):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we call this fixed mask and the other random nan?

"""Masked block"""
a.data[:, 1:3, 1:3] = np.nan
return a


@pytest.fixture
def b_nan_land(b):
"""Masked block"""
b.data[:, 1:3, 1:3] = np.nan
return b


# with zeros
@pytest.fixture
def a_with_zeros(a):
"""Zeros"""
return a.copy().where(a < 0.5, 0)
return a.where(a < 0.5, 0)


# dask
Expand All @@ -84,9 +111,22 @@ def a_dask(a):

@pytest.fixture
def b_dask(b):
"""Chunked"""
return b.chunk()


@pytest.fixture
def a_dask_nan(a_nan):
"""Chunked"""
return a_nan.chunk()


@pytest.fixture
def b_dask_nan(b_nan):
"""Chunked"""
return b_nan.chunk()


@pytest.fixture
def o_dask(o):
return o.chunk()
Expand Down
119 changes: 64 additions & 55 deletions xskillscore/tests/test_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,16 @@ def adjust_weights(dim, weight_bool, weights):
@pytest.mark.parametrize("metrics", correlation_metrics)
@pytest.mark.parametrize("dim", AXES)
@pytest.mark.parametrize("weight_bool", [True, False])
def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("has_nan", [True, False])
def test_correlation_metrics_ufunc_same_np(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how to work with lazy fixtures:

  1. add pytest-lazy-fixture to env
  2. pytest.mark.parametrize(fixtures) https://github.com/TvoroG/pytest-lazy-fixture See https://github.com/pangeo-data/climpred/blob/fdec3af0aabac42de1787aecc0fcbef64a4800f7/climpred/tests/test_bootstrap.py#L227

can be done here for this test and ufunc_dask_np

a, b, dim, weight_bool, weights, metrics, skipna, has_nan
):
"""Test whether correlation metric for xarray functions (from
deterministic.py) give save numerical results as for numpy functions from
deterministic.py) give save numerical results as for numpy functions (from
np_deterministic.py)."""
if has_nan:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn’t you want to use a_nan from conftest here?

a[0] = np.nan
# unpack metrics
metric, _metric = metrics
# Only apply over time dimension for effective p value.
Expand All @@ -98,9 +104,9 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):
# the numpy testing.
_weights = adjust_weights(dim, weight_bool, weights)
if metric in temporal_only_metrics:
actual = metric(a, b, dim)
actual = metric(a, b, dim, skipna=skipna)
else:
actual = metric(a, b, dim, weights=_weights)
actual = metric(a, b, dim, weights=_weights, skipna=skipna)
# check that no chunks for no chunk inputs
assert actual.chunks is None

Expand All @@ -122,28 +128,69 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics):

axis = _a.dims.index(new_dim)
if metric in temporal_only_metrics:
res = _metric(_a.values, _b.values, axis, skipna=False)
res = _metric(_a.values, _b.values, axis, skipna=skipna)
else:
res = _metric(_a.values, _b.values, _weights, axis, skipna=False)
res = _metric(_a.values, _b.values, _weights, axis, skipna=skipna)
expected = actual.copy()
expected.values = res
assert_allclose(actual, expected)


@pytest.mark.parametrize("metrics", correlation_metrics)
@pytest.mark.parametrize("dim", AXES)
@pytest.mark.parametrize("weight_bool", [True, False])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("has_nan", [True, False])
def test_correlation_metrics_ufunc_dask_same_np(
a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan
):
"""Test whether correlation metric for xarray functions can be lazy when
chunked by using dask and give same results as np array."""
a = a_dask.copy()
b = b_dask.copy()
weights = weights_dask.copy()
if has_nan:
a = a.load()
a[0] = np.nan
a = a.chunk()
# unpack metrics
metric, _metric = metrics
# Only apply over time dimension for effective p value.
if (dim != "time") and (metric in temporal_only_metrics):
dim = "time"
# Generates subsetted weights to pass in as arg to main function and for
# the numpy testing.
_weights = adjust_weights(dim, weight_bool, weights)
if metric in temporal_only_metrics:
actual = metric(a, b, dim, skipna=skipna)
else:
actual = metric(a, b, dim, weights=_weights, skipna=skipna)
# check that chunks for chunk inputs
assert actual.chunks is not None
if _weights is not None:
_weights = _weights.load()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compute / load now only needed because a,b,weights require being all either or not chunked

if metric in temporal_only_metrics:
expected = metric(a.load(), b.load(), dim, skipna=skipna)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compute not needed IMO

else:
expected = metric(a.load(), b.load(), dim, _weights, skipna=skipna)
assert expected.chunks is None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that assert needed for the docstring description? IMO not

assert_allclose(actual, expected)


@pytest.mark.parametrize("metrics", distance_metrics)
@pytest.mark.parametrize("dim", AXES)
@pytest.mark.parametrize("weight_bool", [True, False])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("has_nan", [True, False])
def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, has_nan):
def test_distance_metrics_ufunc_same_np(
a, b, dim, weight_bool, weights, metrics, skipna, has_nan
):
"""Test whether distance-based metric for xarray functions (from
deterministic.py) give save numerical results as for numpy functions from
deterministic.py) give save numerical results as for numpy functions (from
np_deterministic.py)."""
# unpack metrics
a = a.copy()
if has_nan:
a[0] = np.nan

# unpack metrics
metric, _metric = metrics
# Generates subsetted weights to pass in as arg to main function and for
# the numpy testing.
Expand Down Expand Up @@ -171,61 +218,23 @@ def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, h
assert_allclose(actual, expected)


@pytest.mark.parametrize("metrics", correlation_metrics)
@pytest.mark.parametrize("dim", AXES)
@pytest.mark.parametrize("weight_bool", [True, False])
def test_correlation_metrics_xr_dask(
a_dask, b_dask, dim, weight_bool, weights_dask, metrics
):
"""Test whether correlation metric for xarray functions can be lazy when
chunked by using dask and give same results."""
a = a_dask
b = b_dask
weights = weights_dask
# unpack metrics
metric, _metric = metrics
# Only apply over time dimension for effective p value.
if (dim != "time") and (metric in temporal_only_metrics):
dim = "time"
# Generates subsetted weights to pass in as arg to main function and for
# the numpy testing.
_weights = adjust_weights(dim, weight_bool, weights)

if metric in temporal_only_metrics:
actual = metric(a, b, dim)
else:
actual = metric(a, b, dim, weights=_weights)
# check that chunks for chunk inputs
assert actual.chunks is not None

if _weights is not None:
_weights = _weights.load()

if metric in temporal_only_metrics:
expected = metric(a.load(), b.load(), dim)
else:
expected = metric(a.load(), b.load(), dim, _weights)
assert expected.chunks is None
assert_allclose(actual.compute(), expected)


@pytest.mark.parametrize("metrics", distance_metrics)
@pytest.mark.parametrize("dim", AXES)
@pytest.mark.parametrize("weight_bool", [True, False])
@pytest.mark.parametrize("skipna", [True, False])
@pytest.mark.parametrize("has_nan", [True, False])
def test_distance_metrics_xr_dask(
def test_distance_metrics_ufunc_dask_same_np(
a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan
):
"""Test whether distance metrics for xarray functions can be lazy when
chunked by using dask and give same results."""
"""Test whether distance metric for xarray functions can be lazy when
chunked by using dask and give same results as np array."""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the test name was misleading. We don't really check whether result is also chunked, as we often do when calling the function _dask

a = a_dask.copy()
b = b_dask.copy()
weights = weights_dask.copy()
if has_nan:
a = a.load()
a[0] = np.nan
a = a.chunk()
b = b_dask.copy()
weights = weights_dask.copy()
# unpack metrics
metric, _metric = metrics
# Generates subsetted weights to pass in as arg to main function and for
Expand All @@ -244,7 +253,7 @@ def test_distance_metrics_xr_dask(
else:
expected = metric(a.load(), b.load(), dim, weights=_weights, skipna=skipna)
assert expected.chunks is None
assert_allclose(actual.compute(), expected)
assert_allclose(actual, expected)


@pytest.mark.parametrize("dim", AXES)
Expand Down
16 changes: 5 additions & 11 deletions xskillscore/tests/test_mask_skipna.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,15 @@
]


def mask_land_data(da):
"""Masks sample data arbitrarily like a block of land."""
da.data[:, 1:3, 1:3] = np.nan
return da


@pytest.mark.parametrize("metric", correlation_metrics + distance_metrics)
@pytest.mark.parametrize("dim", AXES)
def test_metrics_masked(a, b, dim, metric):
def test_metrics_masked(a_nan_land, b_nan_land, dim, metric):
"""Test for all distance-based metrics whether result of skipna does not
contain any nans when applied along dim with nans."""
a_masked = mask_land_data(a)
b_masked = mask_land_data(b)
res_skipna = metric(a_masked, b_masked, dim, skipna=True)
res_no_skipna = metric(a_masked, b_masked, dim, skipna=False)
a = a_nan_land
b = b_nan_land
res_skipna = metric(a, b, dim, skipna=True)
res_no_skipna = metric(a, b, dim, skipna=False)

if "lon" in dim or "lat" in dim: # metric is applied along axis with nans
# res_skipna shouldnt have nans
Expand Down