FEAT-modin-project#7337: Using dynamic partitionning in broadcast_apply

Retribution98 · Retribution98 · commit 39d334838557 · 2024-07-08T16:48:55.000Z
Signed-off-by: Kirill Suvorov &lt;kirill.suvorov@intel.com&gt;
diff --git a/modin/core/dataframe/pandas/dataframe/dataframe.py b/modin/core/dataframe/pandas/dataframe/dataframe.py
@@ -3264,7 +3264,6 @@ def broadcast_apply(
             partition_sizes_along_axis, joined_index = self._get_axis_lengths_cache(
                 axis
             ), self.copy_axis_cache(axis)
-
         new_frame = self._partition_mgr_cls.broadcast_apply(
             axis, func, left_parts, right_parts
         )
diff --git a/modin/core/dataframe/pandas/partitioning/partition_manager.py b/modin/core/dataframe/pandas/partitioning/partition_manager.py
@@ -439,7 +439,7 @@ def get_partitions(index):
 
     @classmethod
     @wait_computations_if_benchmark_mode
-    def broadcast_apply(cls, axis, apply_func, left, right):
+    def base_broadcast_apply(cls, axis, apply_func, left, right):
         """
         Broadcast the `right` partitions to `left` and apply `apply_func` function.
 
@@ -492,6 +492,40 @@ def map_func(df, *others):
             ]
         )
 
+    @classmethod
+    @wait_computations_if_benchmark_mode
+    def broadcast_axis(
+        cls,
+        axis,
+        apply_func,
+        left,
+        right,
+        keep_partitioning=False,
+    ):
+        num_splits = len(left) if axis == 0 else len(left.T)
+        preprocessed_map_func = cls.preprocess_func(apply_func)
+        left_partitions = cls.axis_partition(left, axis)
+        right_partitions = None if right is None else cls.axis_partition(right, axis)
+        kw = {
+            "num_splits": num_splits,
+            "maintain_partitioning": keep_partitioning,
+        }
+
+        result_blocks = np.array(
+            [
+                left_partitions[i].apply(
+                    preprocessed_map_func,
+                    other_axis_partition=right_partitions[i],
+                    **kw,
+                )
+                for i in np.arange(len(left_partitions))
+            ]
+        )
+        # If we are mapping over columns, they are returned to use the same as
+        # rows, so we need to transpose the returned 2D NumPy array to return
+        # the structure to the correct order.
+        return result_blocks.T if not axis else result_blocks
+
     @classmethod
     @wait_computations_if_benchmark_mode
     def broadcast_axis_partitions(
@@ -647,6 +681,63 @@ def base_map_partitions(
             ]
         )
 
+    @classmethod
+    @wait_computations_if_benchmark_mode
+    def broadcast_apply(
+        cls,
+        axis,
+        apply_func,
+        left,
+        right,
+    ):
+        """
+        Broadcast the `right` partitions to `left` and apply `apply_func` function
+        using different approaches to achieve the best performance.
+
+        Parameters
+        ----------
+        axis : {0, 1}
+            Axis to apply and broadcast over.
+        apply_func : callable
+            Function to apply.
+        left : np.ndarray
+            NumPy array of left partitions.
+        right : np.ndarray
+            NumPy array of right partitions.
+
+        Returns
+        -------
+        np.ndarray
+            NumPy array of result partition objects.
+        """
+        # The condition for the execution of `base_broadcast_apply` is different from
+        # the same condition in the `map_partitions`, since the columnar partitioning approach
+        # cannot be implemented for the `broadcast_apply`. This is due to the fact that different
+        # partitions of the left and right dataframes are possible for the `broadcast_apply`,
+        # as a result of which it is necessary to merge partitions on both axes at once,
+        # which leads to large slowdowns.
+        if (
+            np.prod(left.shape) <= 1.5 * CpuCount.get()
+            or left.shape[axis] < CpuCount.get() // 5
+        ):
+            # block-wise broadcast
+            new_partitions = cls.base_broadcast_apply(
+                axis,
+                apply_func,
+                left,
+                right,
+            )
+        else:
+            # axis-wise broadcast
+            new_partitions = cls.broadcast_axis(
+                axis=axis ^ 1,
+                left=left,
+                right=right,
+                apply_func=apply_func,
+                keep_partitioning=True,
+            )
+        return new_partitions
+
     @classmethod
     @wait_computations_if_benchmark_mode
     def map_partitions(
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
@@ -45,7 +45,7 @@
 from pandas.core.indexing import check_bool_indexer
 from pandas.errors import DataError
 
-from modin.config import CpuCount, RangePartitioning
+from modin.config import RangePartitioning
 from modin.core.dataframe.algebra import (
     Binary,
     Fold,
@@ -3107,14 +3107,8 @@ def dropna(self, **kwargs):
             lib.no_default,
             None,
         )
-        # FIXME: this is a naive workaround for this problem: https://github.com/modin-project/modin/issues/5394
-        # if there are too many partitions then all non-full-axis implementations start acting very badly.
-        # The here threshold is pretty random though it works fine on simple scenarios
-        processable_amount_of_partitions = (
-            self._modin_frame.num_parts < CpuCount.get() * 32
-        )
 
-        if is_column_wise and no_thresh_passed and processable_amount_of_partitions:
+        if is_column_wise and no_thresh_passed:
             how = kwargs.get("how", "any")
             subset = kwargs.get("subset")
             how = "any" if how in (lib.no_default, None) else how

Original file line number	Diff line number	Diff line change
`@@ -3264,7 +3264,6 @@ def broadcast_apply(`
`3264`	`3264`	`partition_sizes_along_axis, joined_index = self._get_axis_lengths_cache(`
`3265`	`3265`	`axis`
`3266`	`3266`	`), self.copy_axis_cache(axis)`
`3267`		`-`
`3268`	`3267`	`new_frame = self._partition_mgr_cls.broadcast_apply(`
`3269`	`3268`	`axis, func, left_parts, right_parts`
`3270`	`3269`	`)`