-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add nan corr tests #247
add nan corr tests #247
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,10 +85,16 @@ def adjust_weights(dim, weight_bool, weights): | |
@pytest.mark.parametrize("metrics", correlation_metrics) | ||
@pytest.mark.parametrize("dim", AXES) | ||
@pytest.mark.parametrize("weight_bool", [True, False]) | ||
def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics): | ||
@pytest.mark.parametrize("skipna", [True, False]) | ||
@pytest.mark.parametrize("has_nan", [True, False]) | ||
def test_correlation_metrics_ufunc_same_np( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how to work with lazy fixtures:
can be done here for this test and ufunc_dask_np |
||
a, b, dim, weight_bool, weights, metrics, skipna, has_nan | ||
): | ||
"""Test whether correlation metric for xarray functions (from | ||
deterministic.py) give save numerical results as for numpy functions from | ||
deterministic.py) give save numerical results as for numpy functions (from | ||
np_deterministic.py).""" | ||
if has_nan: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn’t you want to use a_nan from conftest here? |
||
a[0] = np.nan | ||
# unpack metrics | ||
metric, _metric = metrics | ||
# Only apply over time dimension for effective p value. | ||
|
@@ -98,9 +104,9 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics): | |
# the numpy testing. | ||
_weights = adjust_weights(dim, weight_bool, weights) | ||
if metric in temporal_only_metrics: | ||
actual = metric(a, b, dim) | ||
actual = metric(a, b, dim, skipna=skipna) | ||
else: | ||
actual = metric(a, b, dim, weights=_weights) | ||
actual = metric(a, b, dim, weights=_weights, skipna=skipna) | ||
# check that no chunks for no chunk inputs | ||
assert actual.chunks is None | ||
|
||
|
@@ -122,28 +128,69 @@ def test_correlation_metrics_xr(a, b, dim, weight_bool, weights, metrics): | |
|
||
axis = _a.dims.index(new_dim) | ||
if metric in temporal_only_metrics: | ||
res = _metric(_a.values, _b.values, axis, skipna=False) | ||
res = _metric(_a.values, _b.values, axis, skipna=skipna) | ||
else: | ||
res = _metric(_a.values, _b.values, _weights, axis, skipna=False) | ||
res = _metric(_a.values, _b.values, _weights, axis, skipna=skipna) | ||
expected = actual.copy() | ||
expected.values = res | ||
assert_allclose(actual, expected) | ||
|
||
|
||
@pytest.mark.parametrize("metrics", correlation_metrics) | ||
@pytest.mark.parametrize("dim", AXES) | ||
@pytest.mark.parametrize("weight_bool", [True, False]) | ||
@pytest.mark.parametrize("skipna", [True, False]) | ||
@pytest.mark.parametrize("has_nan", [True, False]) | ||
def test_correlation_metrics_ufunc_dask_same_np( | ||
a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan | ||
): | ||
"""Test whether correlation metric for xarray functions can be lazy when | ||
chunked by using dask and give same results as np array.""" | ||
a = a_dask.copy() | ||
b = b_dask.copy() | ||
weights = weights_dask.copy() | ||
if has_nan: | ||
a = a.load() | ||
a[0] = np.nan | ||
a = a.chunk() | ||
# unpack metrics | ||
metric, _metric = metrics | ||
# Only apply over time dimension for effective p value. | ||
if (dim != "time") and (metric in temporal_only_metrics): | ||
dim = "time" | ||
# Generates subsetted weights to pass in as arg to main function and for | ||
# the numpy testing. | ||
_weights = adjust_weights(dim, weight_bool, weights) | ||
if metric in temporal_only_metrics: | ||
actual = metric(a, b, dim, skipna=skipna) | ||
else: | ||
actual = metric(a, b, dim, weights=_weights, skipna=skipna) | ||
# check that chunks for chunk inputs | ||
assert actual.chunks is not None | ||
if _weights is not None: | ||
_weights = _weights.load() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compute / load now only needed because a,b,weights require being all either or not chunked |
||
if metric in temporal_only_metrics: | ||
expected = metric(a.load(), b.load(), dim, skipna=skipna) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compute not needed IMO |
||
else: | ||
expected = metric(a.load(), b.load(), dim, _weights, skipna=skipna) | ||
assert expected.chunks is None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that assert needed for the docstring description? IMO not |
||
assert_allclose(actual, expected) | ||
|
||
|
||
@pytest.mark.parametrize("metrics", distance_metrics) | ||
@pytest.mark.parametrize("dim", AXES) | ||
@pytest.mark.parametrize("weight_bool", [True, False]) | ||
@pytest.mark.parametrize("skipna", [True, False]) | ||
@pytest.mark.parametrize("has_nan", [True, False]) | ||
def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, has_nan): | ||
def test_distance_metrics_ufunc_same_np( | ||
a, b, dim, weight_bool, weights, metrics, skipna, has_nan | ||
): | ||
"""Test whether distance-based metric for xarray functions (from | ||
deterministic.py) give save numerical results as for numpy functions from | ||
deterministic.py) give save numerical results as for numpy functions (from | ||
np_deterministic.py).""" | ||
# unpack metrics | ||
a = a.copy() | ||
if has_nan: | ||
a[0] = np.nan | ||
|
||
# unpack metrics | ||
metric, _metric = metrics | ||
# Generates subsetted weights to pass in as arg to main function and for | ||
# the numpy testing. | ||
|
@@ -171,61 +218,23 @@ def test_distance_metrics_xr(a, b, dim, weight_bool, weights, metrics, skipna, h | |
assert_allclose(actual, expected) | ||
|
||
|
||
@pytest.mark.parametrize("metrics", correlation_metrics) | ||
@pytest.mark.parametrize("dim", AXES) | ||
@pytest.mark.parametrize("weight_bool", [True, False]) | ||
def test_correlation_metrics_xr_dask( | ||
a_dask, b_dask, dim, weight_bool, weights_dask, metrics | ||
): | ||
"""Test whether correlation metric for xarray functions can be lazy when | ||
chunked by using dask and give same results.""" | ||
a = a_dask | ||
b = b_dask | ||
weights = weights_dask | ||
# unpack metrics | ||
metric, _metric = metrics | ||
# Only apply over time dimension for effective p value. | ||
if (dim != "time") and (metric in temporal_only_metrics): | ||
dim = "time" | ||
# Generates subsetted weights to pass in as arg to main function and for | ||
# the numpy testing. | ||
_weights = adjust_weights(dim, weight_bool, weights) | ||
|
||
if metric in temporal_only_metrics: | ||
actual = metric(a, b, dim) | ||
else: | ||
actual = metric(a, b, dim, weights=_weights) | ||
# check that chunks for chunk inputs | ||
assert actual.chunks is not None | ||
|
||
if _weights is not None: | ||
_weights = _weights.load() | ||
|
||
if metric in temporal_only_metrics: | ||
expected = metric(a.load(), b.load(), dim) | ||
else: | ||
expected = metric(a.load(), b.load(), dim, _weights) | ||
assert expected.chunks is None | ||
assert_allclose(actual.compute(), expected) | ||
|
||
|
||
@pytest.mark.parametrize("metrics", distance_metrics) | ||
@pytest.mark.parametrize("dim", AXES) | ||
@pytest.mark.parametrize("weight_bool", [True, False]) | ||
@pytest.mark.parametrize("skipna", [True, False]) | ||
@pytest.mark.parametrize("has_nan", [True, False]) | ||
def test_distance_metrics_xr_dask( | ||
def test_distance_metrics_ufunc_dask_same_np( | ||
a_dask, b_dask, dim, weight_bool, weights_dask, metrics, skipna, has_nan | ||
): | ||
"""Test whether distance metrics for xarray functions can be lazy when | ||
chunked by using dask and give same results.""" | ||
"""Test whether distance metric for xarray functions can be lazy when | ||
chunked by using dask and give same results as np array.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes the test name was misleading. We don't really check whether result is also chunked, as we often do when calling the function _dask |
||
a = a_dask.copy() | ||
b = b_dask.copy() | ||
weights = weights_dask.copy() | ||
if has_nan: | ||
a = a.load() | ||
a[0] = np.nan | ||
a = a.chunk() | ||
b = b_dask.copy() | ||
weights = weights_dask.copy() | ||
# unpack metrics | ||
metric, _metric = metrics | ||
# Generates subsetted weights to pass in as arg to main function and for | ||
|
@@ -244,7 +253,7 @@ def test_distance_metrics_xr_dask( | |
else: | ||
expected = metric(a.load(), b.load(), dim, weights=_weights, skipna=skipna) | ||
assert expected.chunks is None | ||
assert_allclose(actual.compute(), expected) | ||
assert_allclose(actual, expected) | ||
|
||
|
||
@pytest.mark.parametrize("dim", AXES) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we call this fixed mask and the other random nan?