Adapt docstrings as to appease pchs.

Quantco · Feb 27, 2025 · 4d37173 · 4d37173
1 parent 90265e5
commit 4d37173
Show file tree

Hide file tree

Showing 7 changed files with 2,218 additions and 3,561 deletions.
diff --git a/pixi.lock b/pixi.lock
diff --git a/src/datajudge/__init__.py b/src/datajudge/__init__.py
@@ -1,6 +1,4 @@
-"""datajudge allows to assess  whether data from database complies with reference
-information.
-"""
+"""datajudge allows to assess  whether data from database complies with referenceinformation."""
 
 from .constraints.base import Constraint
 from .db_access import Condition, DataSource

diff --git a/src/datajudge/constraints/stats.py b/src/datajudge/constraints/stats.py
@@ -24,9 +24,9 @@ def __init__(
 
     @staticmethod
     def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | None:
-        """
-        Calculates the approximate p-value according to
+        """Calculate the approximate p-value.
 
+        The computation is according to
         'A procedure to find exact critical values of Kolmogorov-Smirnov Test', Silvia Fachinetti, 2009
 
         Note: For environments with `scipy` installed, this method will return a quasi-exact p-value.
@@ -57,8 +57,11 @@ def approximate_p_value(d: float, n_samples: int, m_samples: int) -> float | Non
     def check_acceptance(
         d_statistic: float, n_samples: int, m_samples: int, accepted_level: float
     ) -> bool:
-        """For a given test statistic, d, and the respective sample sizes `n` and `m`, this function
-        checks whether the null hypothesis can be rejected for an accepted significance level.
+        """
+        Check whether the null hypothesis can be rejected for an accepted significance level.
+
+        `d_statistic is the test statistic of interest, and `n_samples` and `m_samples`
+        correspond to the respective sample sizes.
 
         For more information, check out the `Wikipedia entry <https://w.wiki/5May>`_.
         """

diff --git a/src/datajudge/constraints/uniques.py b/src/datajudge/constraints/uniques.py
@@ -395,8 +395,9 @@ def test(self, engine: sa.engine.Engine) -> TestResult:
 
 
 class CategoricalBoundConstraint(Constraint):
-    """`CategoricalBoundConstraint` is a constraint class that checks if the share of specific values
-    in a column falls within predefined bounds. It compares the actual distribution of values in a
+    """Constraint that checks if the share of specific values in a column falls within predefined bounds.
+
+    It compares the actual distribution of values in a
     `DataSource` column with a target distribution, supplied as a dictionary.
 
     Example use cases include testing for consistency in columns with expected categorical values

diff --git a/src/datajudge/db_access.py b/src/datajudge/db_access.py
@@ -44,9 +44,7 @@ def get_table_columns(
 
 
 def apply_patches(engine: sa.engine.Engine) -> None:
-    """
-    Apply patches to e.g. specific dialect not implemented by sqlalchemy
-    """
+    """Apply patches to e.g. specific dialect not implemented by sqlalchemy."""
     if is_bigquery(engine):
         # Patch for the EXCEPT operator (see BigQuery set operators
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#set_operators)
@@ -499,7 +497,7 @@ def get_interval_overlaps_nd(
     end_columns: list[str],
     end_included: bool,
 ) -> tuple[sa.sql.selectable.CompoundSelect, sa.sql.selectable.Select]:
-    """Create selectables for interval overlaps in n dimensions.
+    r"""Create selectables for interval overlaps in n dimensions.
 
     We define the presence of 'overlap' as presence of a non-empty intersection
     between two intervals.
@@ -938,7 +936,7 @@ def get_column(
     aggregate_operator: Callable | None = None,
 ) -> tuple[Any, list[sa.Select]]:
     """
-    Queries the database for the values of the relevant column (as returned by `get_column(...)`).
+    Query the database for the values of the relevant column (as returned by `get_column(...)`).
 
     If an aggregation operation is passed, the results are aggregated accordingly
     and a single scalar value is returned.

diff --git a/src/datajudge/requirements.py b/src/datajudge/requirements.py
@@ -502,9 +502,7 @@ def add_categorical_bound_constraint(
         name: str | None = None,
         cache_size=None,
     ) -> None:
-        """
-        Check if the distribution of unique values in columns falls within the
-        specified minimum and maximum bounds.
+        """Check if the distribution of unique values in columns falls within the specified minimum and maximum bounds.
 
         The ``CategoricalBoundConstraint`` is added to ensure the distribution of unique values
         in the specified columns of a ``DataSource`` falls within the given minimum and maximum
@@ -2012,7 +2010,7 @@ def add_column_type_constraint(
         name: str | None = None,
         cache_size=None,
     ) -> None:
-        "Check that the columns have the same type."
+        """Check that the columns have the same type."""
         ref1 = DataReference(self.data_source, [column1])
         ref2 = DataReference(self.data_source2, [column2])
         self._constraints.append(

diff --git a/src/datajudge/utils.py b/src/datajudge/utils.py
@@ -11,11 +11,14 @@ def format_difference(
     n1: float | int, n2: float | int, decimal_separator: bool = True
 ) -> tuple[str, str]:
     """
+    Format and highlight how two numbers differ.
+
     Given two numbers, n1 and n2, return a tuple of two strings,
     each representing one of the input numbers with the differing part highlighted.
     Highlighting is done using BBCode-like tags, which are replaced by the formatter.
 
-    Examples:
+    Examples
+    --------
         123, 123.0
         -> 123, 123[numDiff].0[/numDiff]
         122593859432, 122593859432347
@@ -26,7 +29,8 @@ def format_difference(
     - n2: The second number to compare.
     - decimal_separator: Whether to separate the decimal part of the numbers with commas.
 
-    Returns:
+    Returns
+    -------
     - A tuple of two strings, each representing one of the input numbers with the differing part highlighted.
     """
     if decimal_separator:
@@ -58,8 +62,9 @@ def output_processor_sort(
     collection: Collection, counts: Collection | None = None
 ) -> tuple[Collection, Collection | None]:
     """
-    Sorts a collection of tuple elements in descending order of their counts,
-    and for ties, makes use of the ascending order of the elements themselves.
+    Sorts a collection of tuple elements in descending order of their counts.
+
+    If ties exist, the ascending order of the elements themselves is used.
 
     If the first element is not instanceof tuple,
     each element will be transparently packaged into a 1-tuple for processing;
@@ -93,11 +98,12 @@ def output_processor_limit(
     collection: Collection, counts: Collection | None = None, limit: int = 100
 ) -> tuple[Collection, Collection | None]:
     """
-    Limits the collection to the first ``limit`` elements.
-    If the list was shortened,
-    will add a ``limit+1``-th string element,
+    Limits the collection to the first `limit` elements.
+
+    If the list was shortened, will add a `limit+1`-th string element,
     informing the user of the truncation.
-    The default limit of ``100`` can be adjusted using ``functools.partial``
+
+    The default limit of ``100`` can be adjusted using `functools.partial`.
     """
     collection = list(collection)