Skip to content

Commit 024b0f5

Browse files
committed
output_processor only
1 parent 6322310 commit 024b0f5

File tree

6 files changed

+147
-236
lines changed

6 files changed

+147
-236
lines changed

src/datajudge/constraints/base.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import sqlalchemy as sa
77

8+
from datajudge.utils import OutputProcessor
9+
810
from ..db_access import DataReference
911
from ..formatter import Formatter
1012

@@ -119,10 +121,7 @@ def __init__(
119121
ref2=None,
120122
ref_value: Any = None,
121123
name: str = None,
122-
output_processor: Callable[
123-
[Collection, Optional[Collection]], Collection
124-
] = None,
125-
output_remainder_slicer=slice(5),
124+
output_processors: List[OutputProcessor] = None,
126125
):
127126
self._check_if_valid_between_or_within(ref2, ref_value)
128127
self.ref = ref
@@ -133,8 +132,7 @@ def __init__(
133132
self.target_selections: OptionalSelections = None
134133
self.factual_queries: Optional[List[str]] = None
135134
self.target_queries: Optional[List[str]] = None
136-
self.output_processor = output_processor
137-
self.output_remainder_slicer = output_remainder_slicer
135+
self.output_processors = output_processors
138136

139137
def _check_if_valid_between_or_within(
140138
self, ref2: Optional[DataReference], ref_value: Optional[Any]
@@ -252,14 +250,10 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
252250
target_queries,
253251
)
254252

255-
def apply_output_formatting_no_counts(
256-
self, values: Collection, apply_remainder_limit=False
257-
) -> Collection:
258-
if self.output_processor is not None:
259-
values, _ = self.output_processor(values) # type: ignore[call-arg]
260-
if apply_remainder_limit:
261-
values = list(values)
262-
values = values[self.output_remainder_slicer]
253+
def apply_output_formatting_no_counts(self, values: Collection) -> Collection:
254+
if self.output_processors is not None:
255+
for output_processor in self.output_processors:
256+
values, _ = output_processor(values)
263257
return values
264258

265259

src/datajudge/constraints/miscs.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,12 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
130130
return TestResult.success()
131131

132132
assertion_text = (
133-
f"{self.ref} has violations of functional dependence (in total {len(violations)} rows), e.g. ({self.output_remainder_slicer}):\n"
133+
f"{self.ref} has violations of functional dependence (in total {len(violations)} rows):\n"
134134
+ "\n".join(
135135
[
136136
f"{violation}"
137137
for violation in self.apply_output_formatting_no_counts(
138-
[tuple(elem) for elem in violations], True
138+
[tuple(elem) for elem in violations]
139139
)
140140
]
141141
)

src/datajudge/constraints/uniques.py

+11-21
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .. import db_access
1111
from ..db_access import DataReference
12-
from ..utils import filternull_element
12+
from ..utils import OutputProcessor, filternull_element
1313
from .base import Constraint, OptionalSelections, T, TestResult, ToleranceGetter
1414

1515

@@ -94,11 +94,6 @@ class Uniques(Constraint, abc.ABC):
9494
The suggested function is ``datajudge.utils.output_processor_sort`` from this file,
9595
- see its documentation for details.
9696
97-
By default, the number of subset or superset remainders (excess or missing values)
98-
for `UniquesSubset` and `UniquesSuperset` is sliced by [:5] (i.e. the first 5) in the assertion message.
99-
This can be configured using `output_remainder_slicer`.
100-
This argument does not have an effect for `UniquesEquality`.
101-
10297
One use is of this constraint is to test for consistency in columns with expected
10398
categorical values.
10499
"""
@@ -107,10 +102,7 @@ def __init__(
107102
self,
108103
ref: DataReference,
109104
name: str = None,
110-
output_processor: Callable[
111-
[Collection, Optional[Collection]], Collection
112-
] = None,
113-
output_remainder_slicer=slice(5),
105+
output_processors: List[OutputProcessor] = None,
114106
*,
115107
ref2: DataReference = None,
116108
uniques: Collection = None,
@@ -127,8 +119,7 @@ def __init__(
127119
ref2=ref2,
128120
ref_value=ref_value,
129121
name=name,
130-
output_processor=output_processor,
131-
output_remainder_slicer=output_remainder_slicer,
122+
output_processors=output_processors,
132123
)
133124

134125
if filter_func is None:
@@ -232,17 +223,16 @@ def compare(
232223
output_elemes, output_counts = list(remainder.keys()), list(
233224
remainder.values()
234225
)
235-
if self.output_processor is not None:
236-
output_elemes, output_counts = self.output_processor(
237-
output_elemes, output_counts
238-
)
239-
output_elemes = output_elemes[self.output_remainder_slicer]
240-
output_counts = output_counts[self.output_remainder_slicer]
226+
if self.output_processors is not None:
227+
for output_processor in self.output_processors:
228+
output_elemes, output_counts = output_processor(
229+
output_elemes, output_counts
230+
)
241231

242232
assertion_text = (
243233
f"{self.ref} has a fraction of {relative_violations} > "
244234
f"{self.max_relative_violations} {'DISTINCT ' if self.compare_distinct else ''}values ({n_violations} / {n_rows}) not being an element of "
245-
f"'{self.apply_output_formatting_no_counts(set(target_values))}'. It has e.g. ({self.output_remainder_slicer}) excess elements "
235+
f"'{self.apply_output_formatting_no_counts(set(target_values))}'. It has excess elements "
246236
f"'{output_elemes}' "
247237
f"with counts {output_counts}."
248238
f"{self.condition_string}"
@@ -277,8 +267,8 @@ def compare(
277267
assertion_text = (
278268
f"{self.ref} has a fraction of "
279269
f"{relative_violations} > {self.max_relative_violations} ({n_violations} / {n_rows}) "
280-
f"lacking unique values of '{self.apply_output_formatting_no_counts(set(target_values))}'. E.g. ({self.output_remainder_slicer}) it "
281-
f"doesn't have the unique value(s) '{self.apply_output_formatting_no_counts(list(remainder), apply_remainder_limit=True)}'."
270+
f"lacking unique values of '{self.apply_output_formatting_no_counts(set(target_values))}'. It "
271+
f"doesn't have the unique value(s) '{self.apply_output_formatting_no_counts(list(remainder))}'."
282272
f"{self.condition_string}"
283273
)
284274
return False, assertion_text

src/datajudge/requirements.py

+22-43
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
import sqlalchemy as sa
1616

17+
from datajudge.utils import OutputProcessor
18+
1719
from .constraints import column as column_constraints
1820
from .constraints import date as date_constraints
1921
from .constraints import groupby as groupby_constraints
@@ -262,9 +264,7 @@ def add_uniques_equality_constraint(
262264
filter_func: Callable[[List[T]], List[T]] = None,
263265
map_func: Callable[[T], T] = None,
264266
reduce_func: Callable[[Collection], Collection] = None,
265-
output_processor: Callable[
266-
[Collection, Optional[Collection]], Collection
267-
] = None,
267+
output_processors: List[OutputProcessor] = None,
268268
condition: Condition = None,
269269
name: str = None,
270270
):
@@ -297,7 +297,7 @@ def add_uniques_equality_constraint(
297297
filter_func=filter_func,
298298
map_func=map_func,
299299
reduce_func=reduce_func,
300-
output_processor=output_processor,
300+
output_processors=output_processors,
301301
name=name,
302302
)
303303
)
@@ -312,10 +312,7 @@ def add_uniques_superset_constraint(
312312
reduce_func: Callable[[Collection], Collection] = None,
313313
condition: Condition = None,
314314
name: str = None,
315-
output_processor: Callable[
316-
[Collection, Optional[Collection]], Collection
317-
] = None,
318-
output_remainder_slicer=slice(5),
315+
output_processors: List[OutputProcessor] = None,
319316
):
320317
"""Check if unique values of columns are contained in the reference data.
321318
@@ -341,7 +338,7 @@ def add_uniques_superset_constraint(
341338
categorical values.
342339
343340
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
344-
``output_processor``, and ``output_remainder_slicer``.
341+
and ``output_processors``.
345342
"""
346343

347344
ref = DataReference(self.data_source, columns, condition)
@@ -353,8 +350,7 @@ def add_uniques_superset_constraint(
353350
filter_func=filter_func,
354351
map_func=map_func,
355352
reduce_func=reduce_func,
356-
output_processor=output_processor,
357-
output_remainder_slicer=output_remainder_slicer,
353+
output_processors=output_processors,
358354
name=name,
359355
)
360356
)
@@ -370,10 +366,7 @@ def add_uniques_subset_constraint(
370366
reduce_func: Callable[[Collection], Collection] = None,
371367
condition: Condition = None,
372368
name: str = None,
373-
output_processor: Callable[
374-
[Collection, Optional[Collection]], Collection
375-
] = None,
376-
output_remainder_slicer=slice(5),
369+
output_processors: List[OutputProcessor] = None,
377370
):
378371
"""Check if the data's unique values are contained in a given set of values.
379372
@@ -403,7 +396,7 @@ def add_uniques_subset_constraint(
403396
or if `max_relative_violations` is 0.
404397
405398
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
406-
``output_processor``, and ``output_remainder_slicer``.
399+
and ``output_processors``.
407400
"""
408401

409402
ref = DataReference(self.data_source, columns, condition)
@@ -416,8 +409,7 @@ def add_uniques_subset_constraint(
416409
compare_distinct=compare_distinct,
417410
map_func=map_func,
418411
reduce_func=reduce_func,
419-
output_processor=output_processor,
420-
output_remainder_slicer=output_remainder_slicer,
412+
output_processors=output_processors,
421413
name=name,
422414
)
423415
)
@@ -876,10 +868,7 @@ def add_functional_dependency_constraint(
876868
value_columns: List[str],
877869
condition: Condition = None,
878870
name: str = None,
879-
output_processor: Callable[
880-
[Collection, Optional[Collection]], Collection
881-
] = None,
882-
output_remainder_slicer=slice(5),
871+
output_processors: List[OutputProcessor] = None,
883872
):
884873
"""
885874
Expresses a functional dependency, a constraint where the `value_columns` are uniquely determined by the `key_columns`.
@@ -889,7 +878,8 @@ def add_functional_dependency_constraint(
889878
and all other columns are included `value_columns`.
890879
This constraint allows for a more general definition of functional dependencies, where the `key_columns` are not necessarily a primary key.
891880
892-
Additional configuration options (for details see the analogous parameters in for ``Uniques``-constraints) on how the output is sorted and how many counterexamples are shown are available as ``output_processor`` and ``output_remainder_slicer``.
881+
An additional configuration option (for details see the analogous parameter in for ``Uniques``-constraints)
882+
on how the output is sorted and how many counterexamples are shown is available as ``output_processors``.
893883
894884
For more information on functional dependencies, see https://en.wikipedia.org/wiki/Functional_dependency.
895885
"""
@@ -899,8 +889,7 @@ def add_functional_dependency_constraint(
899889
miscs_constraints.FunctionalDependency(
900890
ref,
901891
key_columns=key_columns,
902-
output_processor=output_processor,
903-
output_remainder_slicer=output_remainder_slicer,
892+
output_processors=output_processors,
904893
name=name,
905894
)
906895
)
@@ -1476,9 +1465,7 @@ def add_uniques_equality_constraint(
14761465
filter_func: Callable[[List[T]], List[T]] = None,
14771466
map_func: Callable[[T], T] = None,
14781467
reduce_func: Callable[[Collection], Collection] = None,
1479-
output_processor: Callable[
1480-
[Collection, Optional[Collection]], Collection
1481-
] = None,
1468+
output_processors: List[OutputProcessor] = None,
14821469
condition1: Condition = None,
14831470
condition2: Condition = None,
14841471
name: str = None,
@@ -1513,7 +1500,7 @@ def add_uniques_equality_constraint(
15131500
filter_func=filter_func,
15141501
map_func=map_func,
15151502
reduce_func=reduce_func,
1516-
output_processor=output_processor,
1503+
output_processors=output_processors,
15171504
name=name,
15181505
)
15191506
)
@@ -1529,10 +1516,7 @@ def add_uniques_superset_constraint(
15291516
condition1: Condition = None,
15301517
condition2: Condition = None,
15311518
name: str = None,
1532-
output_processor: Callable[
1533-
[Collection, Optional[Collection]], Collection
1534-
] = None,
1535-
output_remainder_slicer=slice(5),
1519+
output_processors: List[OutputProcessor] = None,
15361520
):
15371521
"""Check if unique values of columns are contained in the reference data.
15381522
@@ -1559,7 +1543,7 @@ def add_uniques_superset_constraint(
15591543
categorical values.
15601544
15611545
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
1562-
``output_processor``, and ``output_remainder_slicer``.
1546+
and ``output_processors``.
15631547
"""
15641548

15651549
ref = DataReference(self.data_source, columns1, condition1)
@@ -1572,8 +1556,7 @@ def add_uniques_superset_constraint(
15721556
filter_func=filter_func,
15731557
map_func=map_func,
15741558
reduce_func=reduce_func,
1575-
output_processor=output_processor,
1576-
output_remainder_slicer=output_remainder_slicer,
1559+
output_processors=output_processors,
15771560
name=name,
15781561
)
15791562
)
@@ -1590,10 +1573,7 @@ def add_uniques_subset_constraint(
15901573
condition1: Condition = None,
15911574
condition2: Condition = None,
15921575
name: str = None,
1593-
output_processor: Callable[
1594-
[Collection, Optional[Collection]], Collection
1595-
] = None,
1596-
output_remainder_slicer=slice(5),
1576+
output_processors: List[OutputProcessor] = None,
15971577
):
15981578
"""Check if the given columns's unique values in are contained in reference data.
15991579
@@ -1622,7 +1602,7 @@ def add_uniques_subset_constraint(
16221602
or if `max_relative_violations` is 0.
16231603
16241604
See ``Uniques`` for further details on ``map_func``, ``reduce_func``,
1625-
``output_processor``, and ``output_remainder_slicer``.
1605+
and ``output_processors``.
16261606
"""
16271607

16281608
ref = DataReference(self.data_source, columns1, condition1)
@@ -1636,8 +1616,7 @@ def add_uniques_subset_constraint(
16361616
filter_func=filter_func,
16371617
map_func=map_func,
16381618
reduce_func=reduce_func,
1639-
output_processor=output_processor,
1640-
output_remainder_slicer=output_remainder_slicer,
1619+
output_processors=output_processors,
16411620
name=name,
16421621
)
16431622
)

0 commit comments

Comments
 (0)