Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Feb 20, 2025
1 parent a54a4ea commit e5d9edc
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions py-polars/tests/unit/io/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,8 +892,8 @@ def test_hive_auto_enables_when_unspecified_and_hive_schema_passed(

@pytest.mark.write_disk
def test_hive_parquet_prefiltered_20894_21327(tmp_path: Path) -> None:
file_path = str(tmp_path / "date=2025-01-01/00000000.parquet")
Path(file_path).parent.mkdir(exist_ok=True, parents=True)
file_path = tmp_path / "date=2025-01-01/00000000.parquet"
file_path.parent.mkdir(exist_ok=True, parents=True)

data = pl.DataFrame(
{
Expand All @@ -904,10 +904,11 @@ def test_hive_parquet_prefiltered_20894_21327(tmp_path: Path) -> None:

data.write_parquet(file_path)

import base64
import subprocess

# For security
assert '"' not in file_path
# For security, and for Windows backslashes.
scan_path_b64 = base64.b64encode(str(file_path.absolute()).encode()).decode()

# This is, the easiest way to control the threadpool size so that it is stable.
out = subprocess.check_output(
Expand All @@ -926,7 +927,8 @@ def test_hive_parquet_prefiltered_20894_21327(tmp_path: Path) -> None:
assert pl.thread_pool_size() == 1
df = pl.scan_parquet("{file_path}", hive_partitioning=True).filter(pl.col("value") == "1").collect()
tmp_path = base64.b64decode("{scan_path_b64}").decode()
df = pl.scan_parquet(tmp_path, hive_partitioning=True).filter(pl.col("value") == "1").collect()
# We need the str() to trigger panic on invalid state
str(df)
Expand Down

0 comments on commit e5d9edc

Please sign in to comment.