Skip to content

Commit 308ff99

Browse files
committed
output_processor only
1 parent 6322310 commit 308ff99

File tree

6 files changed

+145
-236
lines changed

6 files changed

+145
-236
lines changed

src/datajudge/constraints/base.py

+7-14
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from ..db_access import DataReference
99
from ..formatter import Formatter
10+
from ..utils import OutputProcessor
1011

1112
DEFAULT_FORMATTER = Formatter()
1213

@@ -119,10 +120,7 @@ def __init__(
119120
ref2=None,
120121
ref_value: Any = None,
121122
name: str = None,
122-
output_processor: Callable[
123-
[Collection, Optional[Collection]], Collection
124-
] = None,
125-
output_remainder_slicer=slice(5),
123+
output_processors: List[OutputProcessor] = None,
126124
):
127125
self._check_if_valid_between_or_within(ref2, ref_value)
128126
self.ref = ref
@@ -133,8 +131,7 @@ def __init__(
133131
self.target_selections: OptionalSelections = None
134132
self.factual_queries: Optional[List[str]] = None
135133
self.target_queries: Optional[List[str]] = None
136-
self.output_processor = output_processor
137-
self.output_remainder_slicer = output_remainder_slicer
134+
self.output_processors = output_processors
138135

139136
def _check_if_valid_between_or_within(
140137
self, ref2: Optional[DataReference], ref_value: Optional[Any]
@@ -252,14 +249,10 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
252249
target_queries,
253250
)
254251

255-
def apply_output_formatting_no_counts(
256-
self, values: Collection, apply_remainder_limit=False
257-
) -> Collection:
258-
if self.output_processor is not None:
259-
values, _ = self.output_processor(values) # type: ignore[call-arg]
260-
if apply_remainder_limit:
261-
values = list(values)
262-
values = values[self.output_remainder_slicer]
252+
def apply_output_formatting_no_counts(self, values: Collection) -> Collection:
253+
if self.output_processors is not None:
254+
for output_processor in self.output_processors:
255+
values, _ = output_processor(values)
263256
return values
264257

265258

src/datajudge/constraints/miscs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,12 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
130130
return TestResult.success()
131131

132132
assertion_text = (
133-
f"{self.ref} has violations of functional dependence (in total {len(violations)} rows), e.g. ({self.output_remainder_slicer}):\n"
133+
f"{self.ref} has violations of functional dependence (in total {len(violations)} rows):\n"
134134
+ "\n".join(
135135
[
136136
f"{violation}"
137137
for violation in self.apply_output_formatting_no_counts(
138-
[tuple(elem) for elem in violations], True
138+
[tuple(elem) for elem in violations]
139139
)
140140
]
141141
)

src/datajudge/constraints/uniques.py

+11-21
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .. import db_access
1111
from ..db_access import DataReference
12-
from ..utils import filternull_element
12+
from ..utils import OutputProcessor, filternull_element
1313
from .base import Constraint, OptionalSelections, T, TestResult, ToleranceGetter
1414

1515

@@ -94,11 +94,6 @@ class Uniques(Constraint, abc.ABC):
9494
The suggested function is ``datajudge.utils.output_processor_sort`` from this file,
9595
- see its documentation for details.
9696
97-
By default, the number of subset or superset remainders (excess or missing values)
98-
for `UniquesSubset` and `UniquesSuperset` is sliced by [:5] (i.e. the first 5) in the assertion message.
99-
This can be configured using `output_remainder_slicer`.
100-
This argument does not have an effect for `UniquesEquality`.
101-
10297
One use is of this constraint is to test for consistency in columns with expected
10398
categorical values.
10499
"""
@@ -107,10 +102,7 @@ def __init__(
107102
self,
108103
ref: DataReference,
109104
name: str = None,
110-
output_processor: Callable[
111-
[Collection, Optional[Collection]], Collection
112-
] = None,
113-
output_remainder_slicer=slice(5),
105+
output_processors: List[OutputProcessor] = None,
114106
*,
115107
ref2: DataReference = None,
116108
uniques: Collection = None,
@@ -127,8 +119,7 @@ def __init__(
127119
ref2=ref2,
128120
ref_value=ref_value,
129121
name=name,
130-
output_processor=output_processor,
131-
output_remainder_slicer=output_remainder_slicer,
122+
output_processors=output_processors,
132123
)
133124

134125
if filter_func is None:
@@ -232,17 +223,16 @@ def compare(
232223
output_elemes, output_counts = list(remainder.keys()), list(
233224
remainder.values()
234225
)
235-
if self.output_processor is not None:
236-
output_elemes, output_counts = self.output_processor(
237-
output_elemes, output_counts
238-
)
239-
output_elemes = output_elemes[self.output_remainder_slicer]
240-
output_counts = output_counts[self.output_remainder_slicer]
226+
if self.output_processors is not None:
227+
for output_processor in self.output_processors:
228+
output_elemes, output_counts = output_processor(
229+
output_elemes, output_counts
230+
)
241231

242232
assertion_text = (
243233
f"{self.ref} has a fraction of {relative_violations} > "
244234
f"{self.max_relative_violations} {'DISTINCT ' if self.compare_distinct else ''}values ({n_violations} / {n_rows}) not being an element of "
245-
f"'{self.apply_output_formatting_no_counts(set(target_values))}'. It has e.g. ({self.output_remainder_slicer}) excess elements "
235+
f"'{self.apply_output_formatting_no_counts(set(target_values))}'. It has excess elements "
246236
f"'{output_elemes}' "
247237
f"with counts {output_counts}."
248238
f"{self.condition_string}"
@@ -277,8 +267,8 @@ def compare(
277267
assertion_text = (
278268
f"{self.ref} has a fraction of "
279269
f"{relative_violations} > {self.max_relative_violations} ({n_violations} / {n_rows}) "
280-
f"lacking unique values of '{self.apply_output_formatting_no_counts(set(target_values))}'. E.g. ({self.output_remainder_slicer}) it "
281-
f"doesn't have the unique value(s) '{self.apply_output_formatting_no_counts(list(remainder), apply_remainder_limit=True)}'."
270+
f"lacking unique values of '{self.apply_output_formatting_no_counts(set(target_values))}'. It "
271+
f"doesn't have the unique value(s) '{self.apply_output_formatting_no_counts(list(remainder))}'."
282272
f"{self.condition_string}"
283273
)
284274
return False, assertion_text

src/datajudge/requirements.py

+21-43
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
TableDataSource,
3535
get_date_growth_rate,
3636
)
37+
from .utils import OutputProcessor
3738

3839
T = TypeVar("T")
3940

@@ -262,9 +263,7 @@ def add_uniques_equality_constraint(
262263
filter_func: Callable[[List[T]], List[T]] = None,
263264
map_func: Callable[[T], T] = None,
264265
reduce_func: Callable[[Collection], Collection] = None,
265-
output_processor: Callable[
266-
[Collection, Optional[Collection]], Collection
267-
] = None,
266+
output_processors: List[OutputProcessor] = None,
268267
condition: Condition = None,
269268
name: str = None,
270269
):
@@ -297,7 +296,7 @@ def add_uniques_equality_constraint(
297296
filter_func=filter_func,
298297
map_func=map_func,
299298
reduce_func=reduce_func,
300-
output_processor=output_processor,
299+
output_processors=output_processors,
301300
name=name,
302301
)
303302
)
@@ -312,10 +311,7 @@ def add_uniques_superset_constraint(
312311
reduce_func: Callable[[Collection], Collection] = None,
313312
condition: Condition = None,
314313
name: str = None,
315-
output_processor: Callable[
316-
[Collection, Optional[Collection]], Collection
317-
] = None,
318-
output_remainder_slicer=slice(5),
314+
output_processors: List[OutputProcessor] = None,
319315
):
320316
"""Check if unique values of columns are contained in the reference data.
321317
@@ -341,7 +337,7 @@ def add_uniques_superset_constraint(
341337
categorical values.
342338
343339
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
344-
``output_processor``, and ``output_remainder_slicer``.
340+
and ``output_processors``.
345341
"""
346342

347343
ref = DataReference(self.data_source, columns, condition)
@@ -353,8 +349,7 @@ def add_uniques_superset_constraint(
353349
filter_func=filter_func,
354350
map_func=map_func,
355351
reduce_func=reduce_func,
356-
output_processor=output_processor,
357-
output_remainder_slicer=output_remainder_slicer,
352+
output_processors=output_processors,
358353
name=name,
359354
)
360355
)
@@ -370,10 +365,7 @@ def add_uniques_subset_constraint(
370365
reduce_func: Callable[[Collection], Collection] = None,
371366
condition: Condition = None,
372367
name: str = None,
373-
output_processor: Callable[
374-
[Collection, Optional[Collection]], Collection
375-
] = None,
376-
output_remainder_slicer=slice(5),
368+
output_processors: List[OutputProcessor] = None,
377369
):
378370
"""Check if the data's unique values are contained in a given set of values.
379371
@@ -403,7 +395,7 @@ def add_uniques_subset_constraint(
403395
or if `max_relative_violations` is 0.
404396
405397
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
406-
``output_processor``, and ``output_remainder_slicer``.
398+
and ``output_processors``.
407399
"""
408400

409401
ref = DataReference(self.data_source, columns, condition)
@@ -416,8 +408,7 @@ def add_uniques_subset_constraint(
416408
compare_distinct=compare_distinct,
417409
map_func=map_func,
418410
reduce_func=reduce_func,
419-
output_processor=output_processor,
420-
output_remainder_slicer=output_remainder_slicer,
411+
output_processors=output_processors,
421412
name=name,
422413
)
423414
)
@@ -876,10 +867,7 @@ def add_functional_dependency_constraint(
876867
value_columns: List[str],
877868
condition: Condition = None,
878869
name: str = None,
879-
output_processor: Callable[
880-
[Collection, Optional[Collection]], Collection
881-
] = None,
882-
output_remainder_slicer=slice(5),
870+
output_processors: List[OutputProcessor] = None,
883871
):
884872
"""
885873
Expresses a functional dependency, a constraint where the `value_columns` are uniquely determined by the `key_columns`.
@@ -889,7 +877,8 @@ def add_functional_dependency_constraint(
889877
and all other columns are included `value_columns`.
890878
This constraint allows for a more general definition of functional dependencies, where the `key_columns` are not necessarily a primary key.
891879
892-
Additional configuration options (for details see the analogous parameters in for ``Uniques``-constraints) on how the output is sorted and how many counterexamples are shown are available as ``output_processor`` and ``output_remainder_slicer``.
880+
An additional configuration option (for details see the analogous parameter in for ``Uniques``-constraints)
881+
on how the output is sorted and how many counterexamples are shown is available as ``output_processors``.
893882
894883
For more information on functional dependencies, see https://en.wikipedia.org/wiki/Functional_dependency.
895884
"""
@@ -899,8 +888,7 @@ def add_functional_dependency_constraint(
899888
miscs_constraints.FunctionalDependency(
900889
ref,
901890
key_columns=key_columns,
902-
output_processor=output_processor,
903-
output_remainder_slicer=output_remainder_slicer,
891+
output_processors=output_processors,
904892
name=name,
905893
)
906894
)
@@ -1476,9 +1464,7 @@ def add_uniques_equality_constraint(
14761464
filter_func: Callable[[List[T]], List[T]] = None,
14771465
map_func: Callable[[T], T] = None,
14781466
reduce_func: Callable[[Collection], Collection] = None,
1479-
output_processor: Callable[
1480-
[Collection, Optional[Collection]], Collection
1481-
] = None,
1467+
output_processors: List[OutputProcessor] = None,
14821468
condition1: Condition = None,
14831469
condition2: Condition = None,
14841470
name: str = None,
@@ -1513,7 +1499,7 @@ def add_uniques_equality_constraint(
15131499
filter_func=filter_func,
15141500
map_func=map_func,
15151501
reduce_func=reduce_func,
1516-
output_processor=output_processor,
1502+
output_processors=output_processors,
15171503
name=name,
15181504
)
15191505
)
@@ -1529,10 +1515,7 @@ def add_uniques_superset_constraint(
15291515
condition1: Condition = None,
15301516
condition2: Condition = None,
15311517
name: str = None,
1532-
output_processor: Callable[
1533-
[Collection, Optional[Collection]], Collection
1534-
] = None,
1535-
output_remainder_slicer=slice(5),
1518+
output_processors: List[OutputProcessor] = None,
15361519
):
15371520
"""Check if unique values of columns are contained in the reference data.
15381521
@@ -1559,7 +1542,7 @@ def add_uniques_superset_constraint(
15591542
categorical values.
15601543
15611544
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
1562-
``output_processor``, and ``output_remainder_slicer``.
1545+
and ``output_processors``.
15631546
"""
15641547

15651548
ref = DataReference(self.data_source, columns1, condition1)
@@ -1572,8 +1555,7 @@ def add_uniques_superset_constraint(
15721555
filter_func=filter_func,
15731556
map_func=map_func,
15741557
reduce_func=reduce_func,
1575-
output_processor=output_processor,
1576-
output_remainder_slicer=output_remainder_slicer,
1558+
output_processors=output_processors,
15771559
name=name,
15781560
)
15791561
)
@@ -1590,10 +1572,7 @@ def add_uniques_subset_constraint(
15901572
condition1: Condition = None,
15911573
condition2: Condition = None,
15921574
name: str = None,
1593-
output_processor: Callable[
1594-
[Collection, Optional[Collection]], Collection
1595-
] = None,
1596-
output_remainder_slicer=slice(5),
1575+
output_processors: List[OutputProcessor] = None,
15971576
):
15981577
"""Check if the given columns's unique values in are contained in reference data.
15991578
@@ -1622,7 +1601,7 @@ def add_uniques_subset_constraint(
16221601
or if `max_relative_violations` is 0.
16231602
16241603
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
1625-
``output_processor``, and ``output_remainder_slicer``.
1604+
and ``output_processors``.
16261605
"""
16271606

16281607
ref = DataReference(self.data_source, columns1, condition1)
@@ -1636,8 +1615,7 @@ def add_uniques_subset_constraint(
16361615
filter_func=filter_func,
16371616
map_func=map_func,
16381617
reduce_func=reduce_func,
1639-
output_processor=output_processor,
1640-
output_remainder_slicer=output_remainder_slicer,
1618+
output_processors=output_processors,
16411619
name=name,
16421620
)
16431621
)

0 commit comments

Comments
 (0)