Skip to content

Commit

Permalink
Adapt docstrings as to appease pchs.
Browse files Browse the repository at this point in the history
  • Loading branch information
kklein committed Feb 27, 2025
1 parent 90265e5 commit 4d37173
Show file tree
Hide file tree
Showing 7 changed files with 2,218 additions and 3,561 deletions.
5,723 changes: 2,188 additions & 3,535 deletions pixi.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions src/datajudge/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""datajudge allows to assess whether data from database complies with reference
information.
"""
"""datajudge allows to assess whether data from database complies with referenceinformation."""

from .constraints.base import Constraint
from .db_access import Condition, DataSource
Expand Down
11 changes: 7 additions & 4 deletions src/datajudge/constraints/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def __init__(

@staticmethod
def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | None:
"""
Calculates the approximate p-value according to
"""Calculate the approximate p-value.
The computation is according to
'A procedure to find exact critical values of Kolmogorov-Smirnov Test', Silvia Fachinetti, 2009
Note: For environments with `scipy` installed, this method will return a quasi-exact p-value.
Expand Down Expand Up @@ -57,8 +57,11 @@ def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | Non
def check_acceptance(
d_statistic: float, n_samples: int, m_samples: int, accepted_level: float
) -> bool:
"""For a given test statistic, d, and the respective sample sizes `n` and `m`, this function
checks whether the null hypothesis can be rejected for an accepted significance level.
"""
Check whether the null hypothesis can be rejected for an accepted significance level.
`d_statistic is the test statistic of interest, and `n_samples` and `m_samples`
correspond to the respective sample sizes.
For more information, check out the `Wikipedia entry <https://w.wiki/5May>`_.
"""
Expand Down
5 changes: 3 additions & 2 deletions src/datajudge/constraints/uniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,9 @@ def test(self, engine: sa.engine.Engine) -> TestResult:


class CategoricalBoundConstraint(Constraint):
"""`CategoricalBoundConstraint` is a constraint class that checks if the share of specific values
in a column falls within predefined bounds. It compares the actual distribution of values in a
"""Constraint that checks if the share of specific values in a column falls within predefined bounds.
It compares the actual distribution of values in a
`DataSource` column with a target distribution, supplied as a dictionary.
Example use cases include testing for consistency in columns with expected categorical values
Expand Down
8 changes: 3 additions & 5 deletions src/datajudge/db_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ def get_table_columns(


def apply_patches(engine: sa.engine.Engine) -> None:
"""
Apply patches to e.g. specific dialect not implemented by sqlalchemy
"""
"""Apply patches to e.g. specific dialect not implemented by sqlalchemy."""
if is_bigquery(engine):
# Patch for the EXCEPT operator (see BigQuery set operators
# https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators)
Expand Down Expand Up @@ -499,7 +497,7 @@ def get_interval_overlaps_nd(
end_columns: list[str],
end_included: bool,
) -> tuple[sa.sql.selectable.CompoundSelect, sa.sql.selectable.Select]:
"""Create selectables for interval overlaps in n dimensions.
r"""Create selectables for interval overlaps in n dimensions.
We define the presence of 'overlap' as presence of a non-empty intersection
between two intervals.
Expand Down Expand Up @@ -938,7 +936,7 @@ def get_column(
aggregate_operator: Callable | None = None,
) -> tuple[Any, list[sa.Select]]:
"""
Queries the database for the values of the relevant column (as returned by `get_column(...)`).
Query the database for the values of the relevant column (as returned by `get_column(...)`).
If an aggregation operation is passed, the results are aggregated accordingly
and a single scalar value is returned.
Expand Down
6 changes: 2 additions & 4 deletions src/datajudge/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,7 @@ def add_categorical_bound_constraint(
name: str | None = None,
cache_size=None,
) -> None:
"""
Check if the distribution of unique values in columns falls within the
specified minimum and maximum bounds.
"""Check if the distribution of unique values in columns falls within the specified minimum and maximum bounds.
The ``CategoricalBoundConstraint`` is added to ensure the distribution of unique values
in the specified columns of a ``DataSource`` falls within the given minimum and maximum
Expand Down Expand Up @@ -2012,7 +2010,7 @@ def add_column_type_constraint(
name: str | None = None,
cache_size=None,
) -> None:
"Check that the columns have the same type."
"""Check that the columns have the same type."""
ref1 = DataReference(self.data_source, [column1])
ref2 = DataReference(self.data_source2, [column2])
self._constraints.append(
Expand Down
22 changes: 14 additions & 8 deletions src/datajudge/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@ def format_difference(
n1: float | int, n2: float | int, decimal_separator: bool = True
) -> tuple[str, str]:
"""
Format and highlight how two numbers differ.
Given two numbers, n1 and n2, return a tuple of two strings,
each representing one of the input numbers with the differing part highlighted.
Highlighting is done using BBCode-like tags, which are replaced by the formatter.
Examples:
Examples
--------
123, 123.0
-> 123, 123[numDiff].0[/numDiff]
122593859432, 122593859432347
Expand All @@ -26,7 +29,8 @@ def format_difference(
- n2: The second number to compare.
- decimal_separator: Whether to separate the decimal part of the numbers with commas.
Returns:
Returns
-------
- A tuple of two strings, each representing one of the input numbers with the differing part highlighted.
"""
if decimal_separator:
Expand Down Expand Up @@ -58,8 +62,9 @@ def output_processor_sort(
collection: Collection, counts: Collection | None = None
) -> tuple[Collection, Collection | None]:
"""
Sorts a collection of tuple elements in descending order of their counts,
and for ties, makes use of the ascending order of the elements themselves.
Sorts a collection of tuple elements in descending order of their counts.
If ties exist, the ascending order of the elements themselves is used.
If the first element is not instanceof tuple,
each element will be transparently packaged into a 1-tuple for processing;
Expand Down Expand Up @@ -93,11 +98,12 @@ def output_processor_limit(
collection: Collection, counts: Collection | None = None, limit: int = 100
) -> tuple[Collection, Collection | None]:
"""
Limits the collection to the first ``limit`` elements.
If the list was shortened,
will add a ``limit+1``-th string element,
Limits the collection to the first `limit` elements.
If the list was shortened, will add a `limit+1`-th string element,
informing the user of the truncation.
The default limit of ``100`` can be adjusted using ``functools.partial``
The default limit of ``100`` can be adjusted using `functools.partial`.
"""
collection = list(collection)

Expand Down

0 comments on commit 4d37173

Please sign in to comment.