Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 70975bc

Browse files
committedJul 10, 2024
fix ci
1 parent c6d3659 commit 70975bc

File tree

3 files changed

+15
-25
lines changed

3 files changed

+15
-25
lines changed
 

‎modin/core/dataframe/algebra/groupby.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,11 @@ def aggregate_on_dict(grp_obj, *args, **kwargs):
655655
)
656656

657657
native_res_part = [] if native_agg_res is None else [native_agg_res]
658-
result = pandas.concat(
659-
[*native_res_part, *custom_results], axis=1, copy=False
660-
)
658+
parts = [*native_res_part, *custom_results]
659+
if parts:
660+
result = pandas.concat(parts, axis=1, copy=False)
661+
else:
662+
result = pandas.DataFrame(columns=result_columns)
661663

662664
# The order is naturally preserved if there's no custom aggregations
663665
if preserve_aggregation_order and len(custom_aggs):

‎modin/core/dataframe/pandas/dataframe/dataframe.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3265,9 +3265,7 @@ def broadcast_apply(
32653265
axis
32663266
), self.copy_axis_cache(axis)
32673267

3268-
new_frame = self._partition_mgr_cls.broadcast_apply(
3269-
axis, func, left_parts, right_parts
3270-
)
3268+
new_frame = self._partition_mgr_cls.apply(axis, func, left_parts, right_parts)
32713269
if isinstance(dtypes, str) and dtypes == "copy":
32723270
dtypes = self.copy_dtypes_cache()
32733271

‎modin/core/dataframe/pandas/partitioning/partition_manager.py

+9-19
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,7 @@ def groupby_reduce(
338338
f"the number of partitions along {axis=} is not equal: "
339339
+ f"{partitions.shape[axis]} != {by.shape[axis]}"
340340
)
341-
mapped_partitions = cls.broadcast_apply(
342-
axis, map_func, left=partitions, right=by
343-
)
341+
mapped_partitions = cls.apply(axis, map_func, left=partitions, right=by)
344342
else:
345343
mapped_partitions = cls.map_partitions(partitions, map_func)
346344

@@ -439,7 +437,7 @@ def get_partitions(index):
439437

440438
@classmethod
441439
@wait_computations_if_benchmark_mode
442-
def base_broadcast_apply(cls, axis, apply_func, left, right):
440+
def broadcast_apply(cls, axis, apply_func, left, right):
443441
"""
444442
Broadcast the `right` partitions to `left` and apply `apply_func` function.
445443
@@ -494,13 +492,12 @@ def map_func(df, *others):
494492

495493
@classmethod
496494
@wait_computations_if_benchmark_mode
497-
def broadcast_axis(
495+
def apply_axis_partitions(
498496
cls,
499497
axis,
500498
apply_func,
501499
left,
502500
right,
503-
keep_partitioning=False,
504501
):
505502
"""
506503
Broadcast the `right` partitions to `left` and apply `apply_func` along full `axis`.
@@ -530,21 +527,15 @@ def broadcast_axis(
530527
This method differs from `broadcast_axis_partitions` in that it does not send
531528
all right partitions for each remote task based on the left partitions.
532529
"""
533-
num_splits = len(left) if axis == 0 else len(left.T)
534530
preprocessed_map_func = cls.preprocess_func(apply_func)
535531
left_partitions = cls.axis_partition(left, axis)
536532
right_partitions = None if right is None else cls.axis_partition(right, axis)
537-
kw = {
538-
"num_splits": num_splits,
539-
"maintain_partitioning": keep_partitioning,
540-
}
541533

542534
result_blocks = np.array(
543535
[
544536
left_partitions[i].apply(
545537
preprocessed_map_func,
546538
other_axis_partition=right_partitions[i],
547-
**kw,
548539
)
549540
for i in np.arange(len(left_partitions))
550541
]
@@ -711,7 +702,7 @@ def base_map_partitions(
711702

712703
@classmethod
713704
@wait_computations_if_benchmark_mode
714-
def broadcast_apply(
705+
def apply(
715706
cls,
716707
axis,
717708
apply_func,
@@ -738,31 +729,30 @@ def broadcast_apply(
738729
np.ndarray
739730
NumPy array of result partition objects.
740731
"""
741-
# The condition for the execution of `base_broadcast_apply` is different from
732+
# The condition for the execution of `broadcast_apply` is different from
742733
# the same condition in the `map_partitions`, since the columnar partitioning approach
743-
# cannot be implemented for the `broadcast_apply`. This is due to the fact that different
744-
# partitions of the left and right dataframes are possible for the `broadcast_apply`,
734+
# cannot be implemented for the `apply`. This is due to the fact that different
735+
# partitions of the left and right dataframes are possible for the `apply`,
745736
# as a result of which it is necessary to merge partitions on both axes at once,
746737
# which leads to large slowdowns.
747738
if (
748739
np.prod(left.shape) <= 1.5 * CpuCount.get()
749740
or left.shape[axis] < CpuCount.get() // 5
750741
):
751742
# block-wise broadcast
752-
new_partitions = cls.base_broadcast_apply(
743+
new_partitions = cls.broadcast_apply(
753744
axis,
754745
apply_func,
755746
left,
756747
right,
757748
)
758749
else:
759750
# axis-wise broadcast
760-
new_partitions = cls.broadcast_axis(
751+
new_partitions = cls.apply_axis_partitions(
761752
axis=axis ^ 1,
762753
left=left,
763754
right=right,
764755
apply_func=apply_func,
765-
keep_partitioning=True,
766756
)
767757
return new_partitions
768758

0 commit comments

Comments
 (0)
Please sign in to comment.