From 6b081390e699e3479fe6f6331cd814aab9077884 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 21 Oct 2024 17:24:57 +1100 Subject: [PATCH] c --- .../plans/optimizer/projection_pushdown/mod.rs | 15 +++++++++++++++ py-polars/tests/unit/io/test_scan.py | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs index 7a84c6990fda..55d5501dd44e 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/mod.rs @@ -511,6 +511,21 @@ impl ProjectionPushDown { file_options.row_index = None; } }; + + if let Some(col_name) = &file_options.include_file_paths { + if output_schema + .as_ref() + .map_or(false, |schema| !schema.contains(col_name)) + { + // Need to remove it from the input schema so + // that projection indices are correct. + let mut file_schema = Arc::unwrap_or_clone(file_info.schema); + file_schema.shift_remove(col_name); + file_info.schema = Arc::new(file_schema); + file_options.include_file_paths = None; + } + }; + let lp = Scan { sources, file_info, diff --git a/py-polars/tests/unit/io/test_scan.py b/py-polars/tests/unit/io/test_scan.py index 799c4953cbf6..99755dfaec0d 100644 --- a/py-polars/tests/unit/io/test_scan.py +++ b/py-polars/tests/unit/io/test_scan.py @@ -801,3 +801,11 @@ def test_scan_double_collect_row_index_invalidates_cached_ir_18892() -> None: schema={"index": pl.UInt32, "a": pl.Int64}, ), ) + + +def test_scan_include_file_paths_respects_projection_pushdown() -> None: + q = pl.scan_csv("a,b,c\na1,b1,c1".encode(), include_file_paths="path_name").select( + ["a", "b"] + ) + + assert_frame_equal(q.collect(), pl.DataFrame({"a": "a1", "b": "b1"}))