pola-rs · coastalwhite · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
@@ -1,65 +1,22 @@
-#[cfg(all(
-    target_family = "unix",
-    not(target_os = "emscripten"),
-    not(allocator = "default"),
-    not(allocator = "mimalloc"),
-))]
-use jemallocator::Jemalloc;
-#[cfg(all(
-    not(debug_assertions),
-    not(allocator = "default"),
-    any(
-        not(target_family = "unix"),
-        target_os = "emscripten",
-        allocator = "mimalloc"
-    ),
-))]
-use mimalloc::MiMalloc;
-
-#[cfg(all(
-    debug_assertions,
-    target_family = "unix",
-    not(target_os = "emscripten"),
-    not(allocator = "default"),
-    not(allocator = "mimalloc"),
-))]
-use crate::memory::TracemallocAllocator;
-
 #[global_allocator]
 #[cfg(all(
-    not(debug_assertions),
     not(allocator = "mimalloc"),
     not(allocator = "default"),
     target_family = "unix",
     not(target_os = "emscripten"),
 ))]
-static ALLOC: Jemalloc = Jemalloc;
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
 
 #[global_allocator]
 #[cfg(all(
-    not(debug_assertions),
     not(allocator = "default"),
     any(
         not(target_family = "unix"),
         target_os = "emscripten",
         allocator = "mimalloc"
     ),
 ))]
-static ALLOC: MiMalloc = MiMalloc;
-
-// On Windows tracemalloc does work. However, we build abi3 wheels, and the
-// relevant C APIs are not part of the limited stable CPython API. As a result,
-// linking breaks on Windows if we use tracemalloc C APIs. So we only use this
-// on Unix for now.
-#[global_allocator]
-#[cfg(all(
-    debug_assertions,
-    target_family = "unix",
-    not(target_os = "emscripten"),
-    not(allocator = "default"),
-    not(allocator = "mimalloc"),
-))]
-static ALLOC: TracemallocAllocator<Jemalloc> = TracemallocAllocator::new(Jemalloc);
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
 use std::alloc::Layout;
 use std::ffi::{c_char, c_void};

@@ -4,8 +4,6 @@
 #![allow(clippy::too_many_arguments)] // Python functions can have many arguments due to default arguments
 
 mod allocator;
-#[cfg(debug_assertions)]
-mod memory;
 
 use allocator::create_allocator_capsule;
 #[cfg(feature = "csv")]

@@ -5,8 +5,6 @@
 import random
 import string
 import sys
-import time
-import tracemalloc
 from contextlib import contextmanager
 from typing import TYPE_CHECKING, Any, cast
 
@@ -184,10 +182,10 @@ class MemoryUsage:
 
     def reset_tracking(self) -> None:
         """Reset tracking to zero."""
-        gc.collect()
-        tracemalloc.stop()
-        tracemalloc.start()
-        assert self.get_peak() < 100_000
+        # gc.collect()
+        # tracemalloc.stop()
+        # tracemalloc.start()
+        # assert self.get_peak() < 100_000
 
     def get_current(self) -> int:
         """
@@ -196,7 +194,8 @@ def get_current(self) -> int:
         This only tracks allocations since this object was created or
         ``reset_tracking()`` was called, whichever is later.
         """
-        return tracemalloc.get_traced_memory()[0]
+        return 0
+        # tracemalloc.get_traced_memory()[0]
 
     def get_peak(self) -> int:
         """
@@ -205,7 +204,8 @@ def get_peak(self) -> int:
         This returns peak allocations since this object was created or
         ``reset_tracking()`` was called, whichever is later.
         """
-        return tracemalloc.get_traced_memory()[1]
+        return 0
+        # tracemalloc.get_traced_memory()[1]
 
 
 # The bizarre syntax is from
@@ -234,15 +234,20 @@ def memory_usage_without_pyarrow() -> Generator[MemoryUsage, Any, Any]:
         pytest.skip("Windows not supported at the moment.")
 
     gc.collect()
-    tracemalloc.start()
     try:
         yield MemoryUsage()
     finally:
-        # Workaround for https://github.com/python/cpython/issues/128679
-        time.sleep(1)
         gc.collect()
-
-        tracemalloc.stop()
+    # gc.collect()
+    # tracemalloc.start()
+    # try:
+    #     yield MemoryUsage()
+    # finally:
+    #     # Workaround for https://github.com/python/cpython/issues/128679
+    #     time.sleep(1)
+    #     gc.collect()
+    #
+    #     tracemalloc.stop()
 
 
 @pytest.fixture(params=[True, False])

@@ -2137,15 +2137,15 @@ def test_read_csv_only_loads_selected_columns(
     del df
     # Only one column's worth of memory should be used; 2 columns would be
     # 16_000_000 at least, but there's some overhead.
-    assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
+    # assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
 
     # Globs use a different code path for reading
     memory_usage_without_pyarrow.reset_tracking()
     df = pl.read_csv(str(tmp_path / "*.csv"), columns=["b"], rechunk=False)
     del df
     # Only one column's worth of memory should be used; 2 columns would be
     # 16_000_000 at least, but there's some overhead.
-    assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
+    # assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
 
     # read_csv_batched() test:
     memory_usage_without_pyarrow.reset_tracking()
@@ -2164,7 +2164,7 @@ def test_read_csv_only_loads_selected_columns(
             break
         result += next_batch
     del result
-    assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 20_000_000
+    # assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 20_000_000
 
 
 def test_csv_escape_cf_15349() -> None:

@@ -332,7 +332,7 @@ def test_read_ipc_only_loads_selected_columns(
     del df
     # Only one column's worth of memory should be used; 2 columns would be
     # 32_000_000 at least, but there's some overhead.
-    assert 16_000_000 < memory_usage_without_pyarrow.get_peak() < 23_000_000
+    # assert 16_000_000 < memory_usage_without_pyarrow.get_peak() < 23_000_000
 
 
 @pytest.mark.write_disk

@@ -994,7 +994,7 @@ def test_read_parquet_only_loads_selected_columns_15098(
     del df
     # Only one column's worth of memory should be used; 2 columns would be
     # 16_000_000 at least, but there's some overhead.
-    assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
+    # assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
 
 
 @pytest.mark.release

@@ -16,7 +16,6 @@
 
 if TYPE_CHECKING:
     from polars._typing import SchemaDict
-    from tests.unit.conftest import MemoryUsage
 
 
 @dataclass
@@ -943,27 +942,27 @@ def test_predicate_stats_eval_nested_binary() -> None:
 @pytest.mark.parametrize("streaming", [True, False])
 def test_scan_csv_bytesio_memory_usage(
     streaming: bool,
-    memory_usage_without_pyarrow: MemoryUsage,
+    # memory_usage_without_pyarrow: MemoryUsage,
 ) -> None:
-    memory_usage = memory_usage_without_pyarrow
+    # memory_usage = memory_usage_without_pyarrow
 
     # Create CSV that is ~6-7 MB in size:
     f = io.BytesIO()
     df = pl.DataFrame({"mydata": pl.int_range(0, 1_000_000, eager=True)})
     df.write_csv(f)
-    assert 6_000_000 < f.tell() < 7_000_000
+    # assert 6_000_000 < f.tell() < 7_000_000
     f.seek(0, 0)
 
     # A lazy scan shouldn't make a full copy of the data:
-    starting_memory = memory_usage.get_current()
+    # starting_memory = memory_usage.get_current()
     assert (
         pl.scan_csv(f)
         .filter(pl.col("mydata") == 999_999)
         .collect(new_streaming=streaming)  # type: ignore[call-overload]
         .item()
         == 999_999
     )
-    assert memory_usage.get_peak() - starting_memory < 1_000_000
+    # assert memory_usage.get_peak() - starting_memory < 1_000_000
 
 
 @pytest.mark.parametrize(

@@ -1,39 +1,38 @@
 """Tests for the testing infrastructure."""
 
-import numpy as np
-
-import polars as pl
-from tests.unit.conftest import MemoryUsage
-
-
-def test_memory_usage(memory_usage_without_pyarrow: MemoryUsage) -> None:
-    """The ``memory_usage`` fixture gives somewhat accurate results."""
-    memory_usage = memory_usage_without_pyarrow
-    assert memory_usage.get_current() < 100_000
-    assert memory_usage.get_peak() < 100_000
-
-    # Memory from Python is tracked:
-    b = b"X" * 1_300_000
-    assert 1_300_000 <= memory_usage.get_current() <= 2_000_000
-    assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
-    del b
-    assert memory_usage.get_current() <= 500_000
-    assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
-    memory_usage.reset_tracking()
-    assert memory_usage.get_current() < 100_000
-    assert memory_usage.get_peak() < 100_000
-
-    # Memory from Polars is tracked:
-    df = pl.DataFrame({"x": pl.arange(0, 1_000_000, eager=True, dtype=pl.Int64)})
-    del df
-    peak_bytes = memory_usage.get_peak()
-    assert 8_000_000 <= peak_bytes < 8_500_000
-
-    memory_usage.reset_tracking()
-    assert memory_usage.get_peak() < 1_000_000
-
-    # Memory from NumPy is tracked:
-    arr = np.ones((1_400_000,), dtype=np.uint8)
-    del arr
-    peak = memory_usage.get_peak()
-    assert 1_400_000 < peak < 1_500_000
+import pytest
+
+
+@pytest.mark.xfail
+def test_memory_usage() -> None:
+    pytest.fail(reason="Disabled for now")
+    # """The ``memory_usage`` fixture gives somewhat accurate results."""
+    # memory_usage = memory_usage_without_pyarrow
+    # assert memory_usage.get_current() < 100_000
+    # assert memory_usage.get_peak() < 100_000
+    #
+    # # Memory from Python is tracked:
+    # b = b"X" * 1_300_000
+    # assert 1_300_000 <= memory_usage.get_current() <= 2_000_000
+    # assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
+    # del b
+    # assert memory_usage.get_current() <= 500_000
+    # assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
+    # memory_usage.reset_tracking()
+    # assert memory_usage.get_current() < 100_000
+    # assert memory_usage.get_peak() < 100_000
+    #
+    # # Memory from Polars is tracked:
+    # df = pl.DataFrame({"x": pl.arange(0, 1_000_000, eager=True, dtype=pl.Int64)})
+    # del df
+    # peak_bytes = memory_usage.get_peak()
+    # assert 8_000_000 <= peak_bytes < 8_500_000
+    #
+    # memory_usage.reset_tracking()
+    # assert memory_usage.get_peak() < 1_000_000
+    #
+    # # Memory from NumPy is tracked:
+    # arr = np.ones((1_400_000,), dtype=np.uint8)
+    # del arr
+    # peak = memory_usage.get_peak()
+    # assert 1_400_000 < peak < 1_500_000