Skip to content

Commit 63e9634

Browse files
committed
move util_ functions to datajudge.utils
1 parent 92f2933 commit 63e9634

File tree

4 files changed

+136
-133
lines changed

4 files changed

+136
-133
lines changed

src/datajudge/constraints/uniques.py

+5-107
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from .. import db_access
1111
from ..db_access import DataReference
12+
from ..utils import util_filternull_default_deprecated
1213
from .base import Constraint, OptionalSelections, T, TestResult, ToleranceGetter
1314

1415

@@ -36,109 +37,6 @@ def _subset_violation_counts(
3637
return len(remainder) == 0, remainder
3738

3839

39-
def util_output_postprocessing_sorter(
40-
collection: Collection, counts: Optional[Collection] = None
41-
):
42-
"""
43-
Sorts a collection of tuple elements in descending order of their counts,
44-
and for ties, makes use of the ascending order of the elements themselves.
45-
46-
If the first element is not instanceof tuple,
47-
each element will be transparently packaged into a 1-tuple for processing;
48-
this process is not visible to the caller.
49-
50-
Handles None values as described in `sort_tuple_none_aware`.
51-
"""
52-
collection = list(collection)
53-
if not isinstance(collection[0], tuple):
54-
# package into a 1 tuple and pass into the method again
55-
packaged_list = [(elem,) for elem in collection]
56-
res_main, res_counts = util_output_postprocessing_sorter(packaged_list, counts)
57-
return [elem[0] for elem in res_main], res_counts
58-
59-
if counts is None:
60-
return sort_tuple_none_aware(collection), counts
61-
62-
assert len(collection) == len(
63-
counts
64-
), "collection and counts must have the same length"
65-
66-
if len(collection) <= 1:
67-
return collection, counts # empty or 1 element lists are always sorted
68-
69-
lst = sort_tuple_none_aware(
70-
[(-count, *elem) for count, elem in zip(counts, collection)]
71-
)
72-
return [elem[1:] for elem in lst], [-elem[0] for elem in lst]
73-
74-
75-
def util_filternull_default_deprecated(values: List[T]) -> List[T]:
76-
return list(filter(lambda value: value is not None, values))
77-
78-
79-
def util_filternull_never(values: List[T]) -> List[T]:
80-
return values
81-
82-
83-
def util_filternull_element_or_tuple_all(values: List[T]) -> List[T]:
84-
return list(
85-
filter(
86-
lambda value: (value is not None)
87-
and (not (isinstance(value, tuple) and all(x is None for x in value))),
88-
values,
89-
)
90-
)
91-
92-
93-
def util_filternull_element_or_tuple_any(values: List[T]) -> List[T]:
94-
return list(
95-
filter(
96-
lambda value: (value is not None)
97-
and (not (isinstance(value, tuple) and any(x is None for x in value))),
98-
values,
99-
)
100-
)
101-
102-
103-
def sort_tuple_none_aware(collection: Collection[Tuple], ascending=True):
104-
"""
105-
Sorts a collection of either tuples or single elements,
106-
where `None` is considered the same as the default value of the respective column's type.
107-
For ints/floats `int()`/`float()` yield `0`/`0.0`, for strings `str()` yields `''`.
108-
The constructor is determined by calling type() on the first non-`None` element of the respective column.
109-
110-
Checks and requires all elements in collection are tuples, and that all tuples have the same length.
111-
"""
112-
lst = list(collection)
113-
114-
if len(lst) <= 1:
115-
return lst # empty or 1 element lists are always sorted
116-
117-
assert all(
118-
isinstance(elem, tuple) and len(elem) == len(lst[0]) for elem in lst
119-
), "all elements must be tuples and have the same length"
120-
121-
dtypes_each_tupleelement: List[Optional[type]] = [None] * len(lst[0])
122-
for dtypeidx in range(len(dtypes_each_tupleelement)):
123-
for elem in lst:
124-
if elem[dtypeidx] is not None:
125-
dtypes_each_tupleelement[dtypeidx] = type(elem[dtypeidx])
126-
break
127-
else:
128-
# if all entries are None, just use a constant int() == 0
129-
dtypes_each_tupleelement[dtypeidx] = int
130-
131-
def replace_None_with_default(elem):
132-
return tuple(
133-
(dtype() if subelem is None else subelem)
134-
for dtype, subelem in zip(dtypes_each_tupleelement, elem)
135-
)
136-
137-
return sorted(
138-
lst, key=lambda elem: replace_None_with_default(elem), reverse=not ascending
139-
)
140-
141-
14240
class Uniques(Constraint, abc.ABC):
14341
"""Uniques is an abstract class for comparisons between unique values of a column and a reference.
14442
@@ -151,10 +49,10 @@ class Uniques(Constraint, abc.ABC):
15149
`WithinRequirement`.
15250
By default, the null filtering does not trigger if multiple columns are fetched at once.
15351
It can be configured in more detail by supplying a custom ``filter_func`` function.
154-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
155-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
52+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
53+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
15654
For new deployments, using one of the above filters or a custom one is recommended.
157-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
55+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
15856
The deprecated default may change in future versions.
15957
To silence the warning, set ``filter_func`` explicitly.
16058
@@ -193,7 +91,7 @@ class Uniques(Constraint, abc.ABC):
19391
which takes in two collections, and returns modified (e.g. sorted) versions of them.
19492
In most cases, the second argument is simply None,
19593
but for `UniquesSubset` it is the counts of each of the elements.
196-
The suggested function is `util_output_postprocessing_sorter` from this file,
94+
The suggested function is ``datajudge.utils.util_output_postprocessing_sorter`` from this file,
19795
- see its documentation for details.
19896
19997
By default, the number of subset or superset remainders (excess or missing values)

src/datajudge/requirements.py

+18-18
Original file line numberDiff line numberDiff line change
@@ -279,10 +279,10 @@ def add_uniques_equality_constraint(
279279
for ``WithinRequirement``.
280280
By default, the null filtering does not trigger if multiple columns are fetched at once.
281281
It can be configured in more detail by supplying a custom ``filter_func`` function.
282-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
283-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
282+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
283+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
284284
For new deployments, using one of the above filters or a custom one is recommended.
285-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
285+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
286286
The deprecated default may change in future versions.
287287
To silence the warning, set ``filter_func`` explicitly.
288288
@@ -328,10 +328,10 @@ def add_uniques_superset_constraint(
328328
for ``WithinRequirement``.
329329
By default, the null filtering does not trigger if multiple columns are fetched at once.
330330
It can be configured in more detail by supplying a custom ``filter_func`` function.
331-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
332-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
331+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
332+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
333333
For new deployments, using one of the above filters or a custom one is recommended.
334-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
334+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
335335
The deprecated default may change in future versions.
336336
To silence the warning, set ``filter_func`` explicitly.
337337
@@ -388,10 +388,10 @@ def add_uniques_subset_constraint(
388388
for ``WithinRequirement``.
389389
By default, the null filtering does not trigger if multiple columns are fetched at once.
390390
It can be configured in more detail by supplying a custom ``filter_func`` function.
391-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
392-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
391+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
392+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
393393
For new deployments, using one of the above filters or a custom one is recommended.
394-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
394+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
395395
The deprecated default may change in future versions.
396396
To silence the warning, set ``filter_func`` explicitly.
397397
@@ -1489,10 +1489,10 @@ def add_uniques_equality_constraint(
14891489
for ``WithinRequirement``.
14901490
By default, the null filtering does not trigger if multiple columns are fetched at once.
14911491
It can be configured in more detail by supplying a custom ``filter_func`` function.
1492-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
1493-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
1492+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
1493+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
14941494
For new deployments, using one of the above filters or a custom one is recommended.
1495-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
1495+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
14961496
The deprecated default may change in future versions.
14971497
To silence the warning, set ``filter_func`` explicitly.
14981498
@@ -1541,10 +1541,10 @@ def add_uniques_superset_constraint(
15411541
for ``WithinRequirement``.
15421542
By default, the null filtering does not trigger if multiple columns are fetched at once.
15431543
It can be configured in more detail by supplying a custom ``filter_func`` function.
1544-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
1545-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
1544+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
1545+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
15461546
For new deployments, using one of the above filters or a custom one is recommended.
1547-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
1547+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
15481548
The deprecated default may change in future versions.
15491549
To silence the warning, set ``filter_func`` explicitly.
15501550
@@ -1603,10 +1603,10 @@ def add_uniques_subset_constraint(
16031603
for ``WithinRequirement``.
16041604
By default, the null filtering does not trigger if multiple columns are fetched at once.
16051605
It can be configured in more detail by supplying a custom ``filter_func`` function.
1606-
Some exemplary implementations are available in this module as ``util_filternull_default_deprecated``,
1607-
``util_filternull_never``, ``util_filternull_element_or_tuple_all``, ``util_filternull_element_or_tuple_any``.
1606+
Some exemplary implementations are available in this module as ``datajudge.utils.util_filternull_default_deprecated``,
1607+
``datajudge.utils.util_filternull_never``, ``datajudge.utils.util_filternull_element_or_tuple_all``, ``datajudge.utils.util_filternull_element_or_tuple_any``.
16081608
For new deployments, using one of the above filters or a custom one is recommended.
1609-
Passing None as the argument is equivalent to ``util_filternull_default_deprecated``, but triggers a warning.
1609+
Passing None as the argument is equivalent to ``datajudge.utils.util_filternull_default_deprecated``, but triggers a warning.
16101610
The deprecated default may change in future versions.
16111611
To silence the warning, set ``filter_func`` explicitly.
16121612

src/datajudge/utils.py

+106-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Tuple, Union
1+
from typing import Collection, List, Optional, Tuple, Union
2+
3+
from .constraints.base import T
24

35

46
def _fmt_diff_part(s, d):
@@ -42,3 +44,106 @@ def format_difference(
4244
f"{s1[:diff_idx]}{_fmt_diff_part(s1, diff_idx)}",
4345
f"{s2[:diff_idx]}{_fmt_diff_part(s2, diff_idx)}",
4446
)
47+
48+
49+
def util_output_postprocessing_sorter(
50+
collection: Collection, counts: Optional[Collection] = None
51+
):
52+
"""
53+
Sorts a collection of tuple elements in descending order of their counts,
54+
and for ties, makes use of the ascending order of the elements themselves.
55+
56+
If the first element is not instanceof tuple,
57+
each element will be transparently packaged into a 1-tuple for processing;
58+
this process is not visible to the caller.
59+
60+
Handles None values as described in `sort_tuple_none_aware`.
61+
"""
62+
collection = list(collection)
63+
if not isinstance(collection[0], tuple):
64+
# package into a 1 tuple and pass into the method again
65+
packaged_list = [(elem,) for elem in collection]
66+
res_main, res_counts = util_output_postprocessing_sorter(packaged_list, counts)
67+
return [elem[0] for elem in res_main], res_counts
68+
69+
if counts is None:
70+
return sort_tuple_none_aware(collection), counts
71+
72+
assert len(collection) == len(
73+
counts
74+
), "collection and counts must have the same length"
75+
76+
if len(collection) <= 1:
77+
return collection, counts # empty or 1 element lists are always sorted
78+
79+
lst = sort_tuple_none_aware(
80+
[(-count, *elem) for count, elem in zip(counts, collection)]
81+
)
82+
return [elem[1:] for elem in lst], [-elem[0] for elem in lst]
83+
84+
85+
def util_filternull_default_deprecated(values: List[T]) -> List[T]:
86+
return list(filter(lambda value: value is not None, values))
87+
88+
89+
def util_filternull_never(values: List[T]) -> List[T]:
90+
return values
91+
92+
93+
def util_filternull_element_or_tuple_all(values: List[T]) -> List[T]:
94+
return list(
95+
filter(
96+
lambda value: (value is not None)
97+
and (not (isinstance(value, tuple) and all(x is None for x in value))),
98+
values,
99+
)
100+
)
101+
102+
103+
def util_filternull_element_or_tuple_any(values: List[T]) -> List[T]:
104+
return list(
105+
filter(
106+
lambda value: (value is not None)
107+
and (not (isinstance(value, tuple) and any(x is None for x in value))),
108+
values,
109+
)
110+
)
111+
112+
113+
def sort_tuple_none_aware(collection: Collection[Tuple], ascending=True):
114+
"""
115+
Sorts a collection of either tuples or single elements,
116+
where `None` is considered the same as the default value of the respective column's type.
117+
For ints/floats `int()`/`float()` yield `0`/`0.0`, for strings `str()` yields `''`.
118+
The constructor is determined by calling type() on the first non-`None` element of the respective column.
119+
120+
Checks and requires all elements in collection are tuples, and that all tuples have the same length.
121+
"""
122+
lst = list(collection)
123+
124+
if len(lst) <= 1:
125+
return lst # empty or 1 element lists are always sorted
126+
127+
assert all(
128+
isinstance(elem, tuple) and len(elem) == len(lst[0]) for elem in lst
129+
), "all elements must be tuples and have the same length"
130+
131+
dtypes_each_tupleelement: List[Optional[type]] = [None] * len(lst[0])
132+
for dtypeidx in range(len(dtypes_each_tupleelement)):
133+
for elem in lst:
134+
if elem[dtypeidx] is not None:
135+
dtypes_each_tupleelement[dtypeidx] = type(elem[dtypeidx])
136+
break
137+
else:
138+
# if all entries are None, just use a constant int() == 0
139+
dtypes_each_tupleelement[dtypeidx] = int
140+
141+
def replace_None_with_default(elem):
142+
return tuple(
143+
(dtype() if subelem is None else subelem)
144+
for dtype, subelem in zip(dtypes_each_tupleelement, elem)
145+
)
146+
147+
return sorted(
148+
lst, key=lambda elem: replace_None_with_default(elem), reverse=not ascending
149+
)

tests/integration/test_integration.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,6 @@
44
import sqlalchemy as sa
55

66
import datajudge.requirements as requirements
7-
from datajudge.constraints.uniques import (
8-
util_filternull_default_deprecated,
9-
util_filternull_element_or_tuple_all,
10-
util_filternull_element_or_tuple_any,
11-
util_filternull_never,
12-
util_output_postprocessing_sorter,
13-
)
147
from datajudge.db_access import (
158
Condition,
169
is_bigquery,
@@ -20,6 +13,13 @@
2013
is_postgresql,
2114
is_snowflake,
2215
)
16+
from datajudge.utils import (
17+
util_filternull_default_deprecated,
18+
util_filternull_element_or_tuple_all,
19+
util_filternull_element_or_tuple_any,
20+
util_filternull_never,
21+
util_output_postprocessing_sorter,
22+
)
2323

2424

2525
def skip_if_mssql(engine):

0 commit comments

Comments
 (0)