Skip to content

Commit

Permalink
add actual tests
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Nov 18, 2024
1 parent b67bc9c commit 9241c27
Showing 1 changed file with 143 additions and 0 deletions.
143 changes: 143 additions & 0 deletions py-polars/tests/unit/test_row_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from __future__ import annotations

import numpy as np
import pytest
from hypothesis import given

import polars as pl
from polars.testing import assert_frame_equal
from polars.testing.parametric import dataframes

# @TODO: At the moment no_order=True breaks roundtripping for some reason
FIELD_COMBS = [
(descending, nulls_last, False)
for descending in [False, True]
for nulls_last in [False, True]
]


def roundtrip_re(
df: pl.DataFrame, fields: list[tuple[bool, bool, bool]] | None = None
) -> None:
if fields is None:
fields = [(False, False, False)] * df.width

row_encoded = df._row_encode(fields)
dtypes = [(c, df.get_column(c).dtype) for c in df.columns]
result = row_encoded._row_decode(dtypes, fields)

assert_frame_equal(df, result)


@given(
df=dataframes(
excluded_dtypes=[
pl.List,
pl.Array,
pl.Struct,
pl.Categorical,
pl.Enum,
pl.Time,
pl.Datetime,
pl.Date,
pl.Duration,
pl.Null,
pl.Decimal,
]
)
)
@pytest.mark.parametrize("field", FIELD_COMBS)
def test_row_encoding_parametric(
df: pl.DataFrame, field: tuple[bool, bool, bool]
) -> None:
roundtrip_re(df, [field] * df.width)


@pytest.mark.parametrize("field", FIELD_COMBS)
def test_bool(field: tuple[bool, bool, bool]) -> None:
roundtrip_re(pl.Series("a", [], pl.Boolean).to_frame(), [field])
roundtrip_re(pl.Series("a", [False], pl.Boolean).to_frame(), [field])
roundtrip_re(pl.Series("a", [True], pl.Boolean).to_frame(), [field])
roundtrip_re(pl.Series("a", [False, True], pl.Boolean).to_frame(), [field])
roundtrip_re(pl.Series("a", [True, False], pl.Boolean).to_frame(), [field])


@pytest.mark.parametrize(
"dtype",
[
pl.Int8,
pl.Int16,
pl.Int32,
pl.Int64,
pl.UInt8,
pl.UInt16,
pl.UInt32,
pl.UInt64,
],
)
@pytest.mark.parametrize("field", FIELD_COMBS)
def test_int(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None:
min = pl.select(x=dtype.min()).item() # type: ignore[attr-defined]
max = pl.select(x=dtype.max()).item() # type: ignore[attr-defined]

roundtrip_re(pl.Series("a", [], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [0], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [min], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [max], dtype).to_frame(), [field])

roundtrip_re(pl.Series("a", [1, 2, 3], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [0, 1, 2, 3], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [min, 0, max], dtype).to_frame(), [field])


@pytest.mark.parametrize(
"dtype",
[
pl.Float32,
pl.Float64,
],
)
@pytest.mark.parametrize("field", FIELD_COMBS)
def test_float(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None:
roundtrip_re(pl.Series("a", [], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [0.0], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [np.Infinity], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [-np.Infinity], dtype).to_frame(), [field])

roundtrip_re(pl.Series("a", [1.0, 2.0, 3.0], dtype).to_frame(), [field])
roundtrip_re(pl.Series("a", [0.0, 1.0, 2.0, 3.0], dtype).to_frame(), [field])
roundtrip_re(
pl.Series("a", [np.Infinity, 0, -np.Infinity], dtype).to_frame(), [field]
)


@pytest.mark.parametrize("field", FIELD_COMBS)
def test_str(field: tuple[bool, bool, bool]) -> None:
roundtrip_re(pl.Series("a", [], pl.String).to_frame(), [field])
roundtrip_re(pl.Series("a", [""], pl.String).to_frame(), [field])

roundtrip_re(pl.Series("a", ["a", "b", "c"], pl.String).to_frame(), [field])
roundtrip_re(pl.Series("a", ["", "a", "b", "c"], pl.String).to_frame(), [field])

roundtrip_re(
pl.Series("a", ["different", "length", "strings"], pl.String).to_frame(),
[field],
)
roundtrip_re(
pl.Series(
"a", ["different", "", "length", "", "strings"], pl.String
).to_frame(),
[field],
)


# def test_struct() -> None:
# # @TODO: How do we deal with zero-field structs?
# # roundtrip_re(pl.Series('a', [], pl.Struct({})).to_frame())
# # roundtrip_re(pl.Series('a', [{}], pl.Struct({})).to_frame())
# roundtrip_re(pl.Series("a", [{"x": 1}], pl.Struct({"x": pl.Int32})).to_frame())
# roundtrip_re(
# pl.Series(
# "a", [{"x": 1}, {"y": 2}], pl.Struct({"x": pl.Int32, "y": pl.Int32})
# ).to_frame()
# )

0 comments on commit 9241c27

Please sign in to comment.