From e537b3b731bd534fa672d1a5727a3b630a31f0cf Mon Sep 17 00:00:00 2001 From: Marshall Date: Fri, 28 Feb 2025 08:23:31 -0500 Subject: [PATCH] chore: Rename 'join_nulls' parameter to 'nulls_equal' in join functions (#21507) --- crates/polars-lazy/src/frame/mod.rs | 16 +-- crates/polars-ops/src/frame/join/args.rs | 8 +- .../src/frame/join/dispatch_left_right.rs | 9 +- .../src/frame/join/hash_join/mod.rs | 6 +- .../src/frame/join/hash_join/single_keys.rs | 6 +- .../join/hash_join/single_keys_dispatch.rs | 128 +++++++++++------- .../frame/join/hash_join/single_keys_inner.rs | 8 +- .../frame/join/hash_join/single_keys_left.rs | 8 +- .../frame/join/hash_join/single_keys_outer.rs | 10 +- .../join/hash_join/single_keys_semi_anti.rs | 16 +-- .../src/frame/join/hash_join/sort_merge.rs | 20 +-- crates/polars-ops/src/frame/join/mod.rs | 22 +-- crates/polars-ops/src/series/ops/replace.rs | 4 +- .../executors/sinks/joins/generic_build.rs | 12 +- .../sinks/joins/generic_probe_inner_left.rs | 14 +- .../sinks/joins/generic_probe_outer.rs | 10 +- .../src/executors/sinks/joins/row_values.rs | 4 +- crates/polars-pipe/src/pipeline/convert.rs | 4 +- crates/polars-python/src/lazyframe/general.rs | 6 +- .../src/lazyframe/visitor/nodes.rs | 2 +- crates/polars-sql/src/context.rs | 2 +- .../src/nodes/joins/equi_join.rs | 2 +- crates/polars-stream/src/physical_plan/fmt.rs | 2 +- docs/source/releases/upgrade/0.20.md | 2 +- py-polars/polars/dataframe/frame.py | 7 +- py-polars/polars/functions/eager.py | 2 +- py-polars/polars/lazyframe/frame.py | 9 +- py-polars/tests/unit/operations/test_join.py | 13 +- py-polars/tests/unit/sql/test_joins.py | 10 +- .../unit/streaming/test_streaming_join.py | 4 +- 30 files changed, 204 insertions(+), 162 deletions(-) diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index 6088c7fffedd..950d9da08043 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1382,7 +1382,7 @@ impl LazyFrame { validation, suffix, slice, - join_nulls, + nulls_equal, coalesce, maintain_order, } = args; @@ -1398,7 +1398,7 @@ impl LazyFrame { .right_on(right_on) .how(how) .validate(validation) - .join_nulls(join_nulls) + .join_nulls(nulls_equal) .coalesce(coalesce) .maintain_order(maintain_order); @@ -2048,7 +2048,7 @@ pub struct JoinBuilder { force_parallel: bool, suffix: Option, validation: JoinValidation, - join_nulls: bool, + nulls_equal: bool, coalesce: JoinCoalesce, maintain_order: MaintainOrderJoin, } @@ -2065,7 +2065,7 @@ impl JoinBuilder { force_parallel: false, suffix: None, validation: Default::default(), - join_nulls: false, + nulls_equal: false, coalesce: Default::default(), maintain_order: Default::default(), } @@ -2127,8 +2127,8 @@ impl JoinBuilder { } /// Join on null values. By default null values will never produce matches. - pub fn join_nulls(mut self, join_nulls: bool) -> Self { - self.join_nulls = join_nulls; + pub fn join_nulls(mut self, nulls_equal: bool) -> Self { + self.nulls_equal = nulls_equal; self } @@ -2169,7 +2169,7 @@ impl JoinBuilder { validation: self.validation, suffix: self.suffix, slice: None, - join_nulls: self.join_nulls, + nulls_equal: self.nulls_equal, coalesce: self.coalesce, maintain_order: self.maintain_order, }; @@ -2266,7 +2266,7 @@ impl JoinBuilder { validation: self.validation, suffix: self.suffix, slice: None, - join_nulls: self.join_nulls, + nulls_equal: self.nulls_equal, coalesce: self.coalesce, maintain_order: self.maintain_order, }; diff --git a/crates/polars-ops/src/frame/join/args.rs b/crates/polars-ops/src/frame/join/args.rs index 3833b9a6a192..1dcdf1ac9f44 100644 --- a/crates/polars-ops/src/frame/join/args.rs +++ b/crates/polars-ops/src/frame/join/args.rs @@ -26,7 +26,7 @@ pub struct JoinArgs { pub validation: JoinValidation, pub suffix: Option, pub slice: Option<(i64, usize)>, - pub join_nulls: bool, + pub nulls_equal: bool, pub coalesce: JoinCoalesce, pub maintain_order: MaintainOrderJoin, } @@ -120,7 +120,7 @@ impl JoinArgs { validation: Default::default(), suffix: None, slice: None, - join_nulls: false, + nulls_equal: false, coalesce: Default::default(), maintain_order: Default::default(), } @@ -312,7 +312,7 @@ impl JoinValidation { s_left: &Series, s_right: &Series, build_shortest_table: bool, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<()> { // In default, probe is the left series. // @@ -330,7 +330,7 @@ impl JoinValidation { // The other side use `validate_build` to check ManyToMany | ManyToOne => true, OneToMany | OneToOne => { - if !join_nulls && probe.null_count() > 0 { + if !nulls_equal && probe.null_count() > 0 { probe.n_unique()? - 1 == probe.len() - probe.null_count() } else { probe.n_unique()? == probe.len() diff --git a/crates/polars-ops/src/frame/join/dispatch_left_right.rs b/crates/polars-ops/src/frame/join/dispatch_left_right.rs index f4011a97bbf0..08289b3e86bc 100644 --- a/crates/polars-ops/src/frame/join/dispatch_left_right.rs +++ b/crates/polars-ops/src/frame/join/dispatch_left_right.rs @@ -79,8 +79,13 @@ pub fn materialize_left_join_from_series( s_right = s_right.rechunk(); } - let (left_idx, right_idx) = - sort_or_hash_left(&s_left, &s_right, verbose, args.validation, args.join_nulls)?; + let (left_idx, right_idx) = sort_or_hash_left( + &s_left, + &s_right, + verbose, + args.validation, + args.nulls_equal, + )?; let right = if let Some(drop_names) = drop_names { right.drop_many(drop_names) diff --git a/crates/polars-ops/src/frame/join/hash_join/mod.rs b/crates/polars-ops/src/frame/join/hash_join/mod.rs index 25d40b8299ba..b5790b73d212 100644 --- a/crates/polars-ops/src/frame/join/hash_join/mod.rs +++ b/crates/polars-ops/src/frame/join/hash_join/mod.rs @@ -129,13 +129,13 @@ pub trait JoinDispatch: IntoDf { s_right: &Series, slice: Option<(i64, usize)>, anti: bool, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult { let ca_self = self.to_df(); #[cfg(feature = "dtype-categorical")] _check_categorical_src(s_left.dtype(), s_right.dtype())?; - let idx = s_left.hash_join_semi_anti(s_right, anti, join_nulls)?; + let idx = s_left.hash_join_semi_anti(s_right, anti, nulls_equal)?; // SAFETY: // indices are in bounds Ok(unsafe { ca_self._finish_anti_semi_join(&idx, slice) }) @@ -153,7 +153,7 @@ pub trait JoinDispatch: IntoDf { // Get the indexes of the joined relations let (mut join_idx_l, mut join_idx_r) = - s_left.hash_join_outer(s_right, args.validation, args.join_nulls)?; + s_left.hash_join_outer(s_right, args.validation, args.nulls_equal)?; try_raise_keyboard_interrupt(); if let Some((offset, len)) = args.slice { diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys.rs index 38b59c2d7454..35123aeb3a42 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys.rs @@ -15,7 +15,7 @@ const MIN_ELEMS_PER_THREAD: usize = if cfg!(debug_assertions) { 1 } else { 128 } pub(crate) fn build_tables( keys: Vec, - join_nulls: bool, + nulls_equal: bool, ) -> Vec::TotalOrdItem, IdxVec>> where T: TotalHash + TotalEq + ToTotalOrd, @@ -38,7 +38,7 @@ where for it in keys { for k in it { let k = k.to_total_ord(); - if !k.is_null() || join_nulls { + if !k.is_null() || nulls_equal { hm.entry(k).or_default().push(offset); } offset += 1; @@ -144,7 +144,7 @@ where let key = *scatter_keys.get_unchecked(i); - if !key.is_null() || join_nulls { + if !key.is_null() || nulls_equal { let idx = *scatter_idxs.get_unchecked(i); match hm.entry(key) { Entry::Occupied(mut o) => { diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs index bc5ae0d0c7f2..208b0a558b37 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_dispatch.rs @@ -17,11 +17,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { &self, other: &Series, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult { let s_self = self.as_series(); let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr()); - validate.validate_probe(&lhs, &rhs, false, join_nulls)?; + validate.validate_probe(&lhs, &rhs, false, nulls_equal)?; let lhs_dtype = lhs.dtype(); let rhs_dtype = rhs.dtype(); @@ -37,7 +37,15 @@ pub trait SeriesJoin: SeriesSealed + Sized { let lhs = lhs.iter().map(|v| v.as_slice()).collect::>(); let rhs = rhs.iter().map(|v| v.as_slice()).collect::>(); let build_null_count = other.null_count(); - hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls, build_null_count) + hash_join_tuples_left( + lhs, + rhs, + None, + None, + validate, + nulls_equal, + build_null_count, + ) }, T::BinaryOffset => { let lhs = lhs.binary_offset().unwrap(); @@ -47,19 +55,27 @@ pub trait SeriesJoin: SeriesSealed + Sized { let lhs = lhs.iter().map(|k| k.as_slice()).collect::>(); let rhs = rhs.iter().map(|k| k.as_slice()).collect::>(); let build_null_count = other.null_count(); - hash_join_tuples_left(lhs, rhs, None, None, validate, join_nulls, build_null_count) + hash_join_tuples_left( + lhs, + rhs, + None, + None, + validate, + nulls_equal, + build_null_count, + ) }, #[cfg(feature = "dtype-struct")] T::Struct(_) => { let lhs = &encode_rows_unordered(&[lhs.into_owned().into()])?.into_series(); let rhs = &encode_rows_unordered(&[rhs.into_owned().into()])?.into_series(); - lhs.hash_join_left(rhs, validate, join_nulls) + lhs.hash_join_left(rhs, validate, nulls_equal) }, x if x.is_float() => { with_match_physical_float_polars_type!(lhs.dtype(), |$T| { let lhs: &ChunkedArray<$T> = lhs.as_ref().as_ref().as_ref(); let rhs: &ChunkedArray<$T> = rhs.as_ref().as_ref().as_ref(); - num_group_join_left(lhs, rhs, validate, join_nulls) + num_group_join_left(lhs, rhs, validate, nulls_equal) }) }, _ => { @@ -74,11 +90,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { match (lhs, rhs) { (B::Small(lhs), B::Small(rhs)) => { // Turbofish: see #17137. - num_group_join_left::(&lhs, &rhs, validate, join_nulls) + num_group_join_left::(&lhs, &rhs, validate, nulls_equal) }, (B::Large(lhs), B::Large(rhs)) => { // Turbofish: see #17137. - num_group_join_left::(&lhs, &rhs, validate, join_nulls) + num_group_join_left::(&lhs, &rhs, validate, nulls_equal) }, _ => { polars_bail!( @@ -95,7 +111,7 @@ pub trait SeriesJoin: SeriesSealed + Sized { &self, other: &Series, anti: bool, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult> { let s_self = self.as_series(); let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr()); @@ -115,9 +131,9 @@ pub trait SeriesJoin: SeriesSealed + Sized { let lhs = lhs.iter().map(|k| k.as_slice()).collect::>(); let rhs = rhs.iter().map(|k| k.as_slice()).collect::>(); if anti { - hash_join_tuples_left_anti(lhs, rhs, join_nulls) + hash_join_tuples_left_anti(lhs, rhs, nulls_equal) } else { - hash_join_tuples_left_semi(lhs, rhs, join_nulls) + hash_join_tuples_left_semi(lhs, rhs, nulls_equal) } }, T::BinaryOffset => { @@ -128,22 +144,22 @@ pub trait SeriesJoin: SeriesSealed + Sized { let lhs = lhs.iter().map(|k| k.as_slice()).collect::>(); let rhs = rhs.iter().map(|k| k.as_slice()).collect::>(); if anti { - hash_join_tuples_left_anti(lhs, rhs, join_nulls) + hash_join_tuples_left_anti(lhs, rhs, nulls_equal) } else { - hash_join_tuples_left_semi(lhs, rhs, join_nulls) + hash_join_tuples_left_semi(lhs, rhs, nulls_equal) } }, #[cfg(feature = "dtype-struct")] T::Struct(_) => { let lhs = &encode_rows_unordered(&[lhs.into_owned().into()])?.into_series(); let rhs = &encode_rows_unordered(&[rhs.into_owned().into()])?.into_series(); - lhs.hash_join_semi_anti(rhs, anti, join_nulls)? + lhs.hash_join_semi_anti(rhs, anti, nulls_equal)? }, x if x.is_float() => { with_match_physical_float_polars_type!(lhs.dtype(), |$T| { let lhs: &ChunkedArray<$T> = lhs.as_ref().as_ref().as_ref(); let rhs: &ChunkedArray<$T> = rhs.as_ref().as_ref().as_ref(); - num_group_join_anti_semi(lhs, rhs, anti, join_nulls) + num_group_join_anti_semi(lhs, rhs, anti, nulls_equal) }) }, _ => { @@ -158,11 +174,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { match (lhs, rhs) { (B::Small(lhs), B::Small(rhs)) => { // Turbofish: see #17137. - num_group_join_anti_semi::(&lhs, &rhs, anti, join_nulls) + num_group_join_anti_semi::(&lhs, &rhs, anti, nulls_equal) }, (B::Large(lhs), B::Large(rhs)) => { // Turbofish: see #17137. - num_group_join_anti_semi::(&lhs, &rhs, anti, join_nulls) + num_group_join_anti_semi::(&lhs, &rhs, anti, nulls_equal) }, _ => { polars_bail!( @@ -179,11 +195,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { &self, other: &Series, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(InnerJoinIds, bool)> { let s_self = self.as_series(); let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr()); - validate.validate_probe(&lhs, &rhs, true, join_nulls)?; + validate.validate_probe(&lhs, &rhs, true, nulls_equal)?; let lhs_dtype = lhs.dtype(); let rhs_dtype = rhs.dtype(); @@ -210,7 +226,7 @@ pub trait SeriesJoin: SeriesSealed + Sized { rhs, swapped, validate, - join_nulls, + nulls_equal, build_null_count, )?, !swapped, @@ -234,7 +250,7 @@ pub trait SeriesJoin: SeriesSealed + Sized { rhs, swapped, validate, - join_nulls, + nulls_equal, build_null_count, )?, !swapped, @@ -244,13 +260,13 @@ pub trait SeriesJoin: SeriesSealed + Sized { T::Struct(_) => { let lhs = &encode_rows_unordered(&[lhs.into_owned().into()])?.into_series(); let rhs = &encode_rows_unordered(&[rhs.into_owned().into()])?.into_series(); - lhs.hash_join_inner(rhs, validate, join_nulls) + lhs.hash_join_inner(rhs, validate, nulls_equal) }, x if x.is_float() => { with_match_physical_float_polars_type!(lhs.dtype(), |$T| { let lhs: &ChunkedArray<$T> = lhs.as_ref().as_ref().as_ref(); let rhs: &ChunkedArray<$T> = rhs.as_ref().as_ref().as_ref(); - group_join_inner::<$T>(lhs, rhs, validate, join_nulls) + group_join_inner::<$T>(lhs, rhs, validate, nulls_equal) }) }, _ => { @@ -265,11 +281,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { match (lhs, rhs) { (B::Small(lhs), B::Small(rhs)) => { // Turbofish: see #17137. - group_join_inner::(&lhs, &rhs, validate, join_nulls) + group_join_inner::(&lhs, &rhs, validate, nulls_equal) }, (B::Large(lhs), BitRepr::Large(rhs)) => { // Turbofish: see #17137. - group_join_inner::(&lhs, &rhs, validate, join_nulls) + group_join_inner::(&lhs, &rhs, validate, nulls_equal) }, _ => { polars_bail!( @@ -285,11 +301,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { &self, other: &Series, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(PrimitiveArray, PrimitiveArray)> { let s_self = self.as_series(); let (lhs, rhs) = (s_self.to_physical_repr(), other.to_physical_repr()); - validate.validate_probe(&lhs, &rhs, true, join_nulls)?; + validate.validate_probe(&lhs, &rhs, true, nulls_equal)?; let lhs_dtype = lhs.dtype(); let rhs_dtype = rhs.dtype(); @@ -305,7 +321,7 @@ pub trait SeriesJoin: SeriesSealed + Sized { // Take slices so that vecs are not copied let lhs = lhs.iter().map(|k| k.as_slice()).collect::>(); let rhs = rhs.iter().map(|k| k.as_slice()).collect::>(); - hash_join_tuples_outer(lhs, rhs, swapped, validate, join_nulls) + hash_join_tuples_outer(lhs, rhs, swapped, validate, nulls_equal) }, T::BinaryOffset => { let lhs = lhs.binary_offset().unwrap(); @@ -314,19 +330,19 @@ pub trait SeriesJoin: SeriesSealed + Sized { // Take slices so that vecs are not copied let lhs = lhs.iter().map(|k| k.as_slice()).collect::>(); let rhs = rhs.iter().map(|k| k.as_slice()).collect::>(); - hash_join_tuples_outer(lhs, rhs, swapped, validate, join_nulls) + hash_join_tuples_outer(lhs, rhs, swapped, validate, nulls_equal) }, #[cfg(feature = "dtype-struct")] T::Struct(_) => { let lhs = &encode_rows_unordered(&[lhs.into_owned().into()])?.into_series(); let rhs = &encode_rows_unordered(&[rhs.into_owned().into()])?.into_series(); - lhs.hash_join_outer(rhs, validate, join_nulls) + lhs.hash_join_outer(rhs, validate, nulls_equal) }, x if x.is_float() => { with_match_physical_float_polars_type!(lhs.dtype(), |$T| { let lhs: &ChunkedArray<$T> = lhs.as_ref().as_ref().as_ref(); let rhs: &ChunkedArray<$T> = rhs.as_ref().as_ref().as_ref(); - hash_join_outer(lhs, rhs, validate, join_nulls) + hash_join_outer(lhs, rhs, validate, nulls_equal) }) }, _ => { @@ -338,11 +354,11 @@ pub trait SeriesJoin: SeriesSealed + Sized { match (lhs, rhs) { (B::Small(lhs), B::Small(rhs)) => { // Turbofish: see #17137. - hash_join_outer::(&lhs, &rhs, validate, join_nulls) + hash_join_outer::(&lhs, &rhs, validate, nulls_equal) }, (B::Large(lhs), B::Large(rhs)) => { // Turbofish: see #17137. - hash_join_outer::(&lhs, &rhs, validate, join_nulls) + hash_join_outer::(&lhs, &rhs, validate, nulls_equal) }, _ => { polars_bail!(nyi = "Mismatch bit repr Hash Join Outer between {lhs_dtype} and {rhs_dtype}"); @@ -373,7 +389,7 @@ fn group_join_inner( left: &ChunkedArray, right: &ChunkedArray, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(InnerJoinIds, bool)> where T: PolarsDataType, @@ -404,14 +420,24 @@ where .collect::>(); Ok(( hash_join_tuples_inner( - splitted_a, splitted_b, swapped, validate, join_nulls, 0, + splitted_a, + splitted_b, + swapped, + validate, + nulls_equal, + 0, )?, !swapped, )) } else { Ok(( hash_join_tuples_inner( - splitted_a, splitted_b, swapped, validate, join_nulls, 0, + splitted_a, + splitted_b, + swapped, + validate, + nulls_equal, + 0, )?, !swapped, )) @@ -429,7 +455,7 @@ where splitted_b, swapped, validate, - join_nulls, + nulls_equal, build_null_count, )?, !swapped, @@ -478,7 +504,7 @@ fn num_group_join_left( left: &ChunkedArray, right: &ChunkedArray, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult where T: PolarsNumericType, @@ -499,7 +525,7 @@ where (0, 0, 1, 1) => { let keys_a = chunks_as_slices(&splitted_a); let keys_b = chunks_as_slices(&splitted_b); - hash_join_tuples_left(keys_a, keys_b, None, None, validate, join_nulls, 0) + hash_join_tuples_left(keys_a, keys_b, None, None, validate, nulls_equal, 0) }, (0, 0, _, _) => { let keys_a = chunks_as_slices(&splitted_a); @@ -513,7 +539,7 @@ where mapping_left.as_deref(), mapping_right.as_deref(), validate, - join_nulls, + nulls_equal, 0, ) }, @@ -529,7 +555,7 @@ where mapping_left.as_deref(), mapping_right.as_deref(), validate, - join_nulls, + nulls_equal, build_null_count, ) }, @@ -540,7 +566,7 @@ fn hash_join_outer( ca_in: &ChunkedArray, other: &ChunkedArray, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(PrimitiveArray, PrimitiveArray)> where T: PolarsNumericType, @@ -563,7 +589,7 @@ where .iter() .flat_map(|ca| ca.downcast_iter().map(|arr| arr.values().as_slice())) .collect::>(); - hash_join_tuples_outer(iters_a, iters_b, swapped, validate, join_nulls) + hash_join_tuples_outer(iters_a, iters_b, swapped, validate, nulls_equal) }, _ => { let iters_a = splitted_a @@ -574,7 +600,7 @@ where .iter() .flat_map(|ca| ca.downcast_iter().map(|arr| arr.iter())) .collect::>(); - hash_join_tuples_outer(iters_a, iters_b, swapped, validate, join_nulls) + hash_join_tuples_outer(iters_a, iters_b, swapped, validate, nulls_equal) }, } } @@ -612,7 +638,7 @@ fn num_group_join_anti_semi( left: &ChunkedArray, right: &ChunkedArray, anti: bool, - join_nulls: bool, + nulls_equal: bool, ) -> Vec where T: PolarsNumericType, @@ -633,27 +659,27 @@ where let keys_a = chunks_as_slices(&splitted_a); let keys_b = chunks_as_slices(&splitted_b); if anti { - hash_join_tuples_left_anti(keys_a, keys_b, join_nulls) + hash_join_tuples_left_anti(keys_a, keys_b, nulls_equal) } else { - hash_join_tuples_left_semi(keys_a, keys_b, join_nulls) + hash_join_tuples_left_semi(keys_a, keys_b, nulls_equal) } }, (0, 0, _, _) => { let keys_a = chunks_as_slices(&splitted_a); let keys_b = chunks_as_slices(&splitted_b); if anti { - hash_join_tuples_left_anti(keys_a, keys_b, join_nulls) + hash_join_tuples_left_anti(keys_a, keys_b, nulls_equal) } else { - hash_join_tuples_left_semi(keys_a, keys_b, join_nulls) + hash_join_tuples_left_semi(keys_a, keys_b, nulls_equal) } }, _ => { let keys_a = get_arrays(&splitted_a); let keys_b = get_arrays(&splitted_b); if anti { - hash_join_tuples_left_anti(keys_a, keys_b, join_nulls) + hash_join_tuples_left_anti(keys_a, keys_b, nulls_equal) } else { - hash_join_tuples_left_semi(keys_a, keys_b, join_nulls) + hash_join_tuples_left_semi(keys_a, keys_b, nulls_equal) } }, } diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs index e4dd691be710..8359fe79a4f9 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_inner.rs @@ -43,7 +43,7 @@ pub(super) fn hash_join_tuples_inner( // Because b should be the shorter relation we could need to swap to keep left left and right right. swapped: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, // Null count is required for join validation build_null_count: usize, ) -> PolarsResult<(Vec, Vec)> @@ -59,15 +59,15 @@ where .iter() .map(|v| v.clone().into_iter().size_hint().1.unwrap()) .sum(); - if !join_nulls { + if !nulls_equal { expected_size -= build_null_count; } - let hash_tbls = build_tables(build, join_nulls); + let hash_tbls = build_tables(build, nulls_equal); let build_size = hash_tbls.iter().map(|m| m.len()).sum(); validate.validate_build(build_size, expected_size, swapped)?; hash_tbls } else { - build_tables(build, join_nulls) + build_tables(build, nulls_equal) }; try_raise_keyboard_interrupt(); diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs index 5cc457a510f0..3dda2a995551 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_left.rs @@ -111,7 +111,7 @@ pub(super) fn hash_join_tuples_left( chunk_mapping_left: Option<&[ChunkId]>, chunk_mapping_right: Option<&[ChunkId]>, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, // We should know the number of nulls to avoid extra calculation build_null_count: usize, ) -> PolarsResult @@ -126,15 +126,15 @@ where // first we hash one relation let hash_tbls = if validate.needs_checks() { let mut expected_size = build.iter().map(|v| v.size_hint().1.unwrap()).sum(); - if !join_nulls { + if !nulls_equal { expected_size -= build_null_count; } - let hash_tbls = build_tables(build, join_nulls); + let hash_tbls = build_tables(build, nulls_equal); let build_size = hash_tbls.iter().map(|m| m.len()).sum(); validate.validate_build(build_size, expected_size, false)?; hash_tbls } else { - build_tables(build, join_nulls) + build_tables(build, nulls_equal) }; try_raise_keyboard_interrupt(); let n_tables = hash_tbls.len(); diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_outer.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_outer.rs index 39b4689cc8d5..acacd5008452 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_outer.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_outer.rs @@ -111,7 +111,7 @@ fn probe_outer( swap_fn_no_match: G, // Function that get index_b from the build table that did not match any in A and pushes to result swap_fn_drain: H, - join_nulls: bool, + nulls_equal: bool, ) where T: TotalHash + TotalEq + ToTotalOrd, ::TotalOrdItem: Hash + Eq + IsNull, @@ -139,7 +139,7 @@ fn probe_outer( match entry { // match and remove RawEntryMut::Occupied(mut occupied) => { - if key.is_null() && !join_nulls { + if key.is_null() && !nulls_equal { let (l, r) = swap_fn_no_match(idx_a); results.0.push(l); results.1.push(r); @@ -182,7 +182,7 @@ pub(super) fn hash_join_tuples_outer( build: Vec, swapped: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(PrimitiveArray, PrimitiveArray)> where I: IntoIterator, @@ -247,7 +247,7 @@ where |idx_a, idx_b| (Some(idx_b), Some(idx_a)), |idx_a| (None, Some(idx_a)), |idx_b| (Some(idx_b), None), - join_nulls, + nulls_equal, ) } else { probe_outer( @@ -258,7 +258,7 @@ where |idx_a, idx_b| (Some(idx_a), Some(idx_b)), |idx_a| (Some(idx_a), None), |idx_b| (None, Some(idx_b)), - join_nulls, + nulls_equal, ) } Ok((results.0.into(), results.1.into())) diff --git a/crates/polars-ops/src/frame/join/hash_join/single_keys_semi_anti.rs b/crates/polars-ops/src/frame/join/hash_join/single_keys_semi_anti.rs index b902d64bff8f..7fa7739c3064 100644 --- a/crates/polars-ops/src/frame/join/hash_join/single_keys_semi_anti.rs +++ b/crates/polars-ops/src/frame/join/hash_join/single_keys_semi_anti.rs @@ -7,7 +7,7 @@ use super::*; /// Only keeps track of membership in right table pub(super) fn build_table_semi_anti( keys: Vec, - join_nulls: bool, + nulls_equal: bool, ) -> Vec::TotalOrdItem>> where T: TotalHash + TotalEq + DirtyHash + ToTotalOrd, @@ -25,7 +25,7 @@ where keys.into_iter().for_each(|k| { let k = k.to_total_ord(); if partition_no == hash_to_partition(k.dirty_hash(), n_partitions) - && (!k.is_null() || join_nulls) + && (!k.is_null() || nulls_equal) { hash_tbl.insert(k); } @@ -41,7 +41,7 @@ where fn semi_anti_impl( probe: Vec, build: Vec, - join_nulls: bool, + nulls_equal: bool, ) -> impl ParallelIterator where I: IntoIterator + Copy + Send + Sync, @@ -49,7 +49,7 @@ where ::TotalOrdItem: Send + Sync + Hash + Eq + DirtyHash + IsNull, { // first we hash one relation - let hash_sets = build_table_semi_anti(build, join_nulls); + let hash_sets = build_table_semi_anti(build, nulls_equal); // we determine the offset so that we later know which index to store in the join tuples let offsets = probe_to_offsets(&probe); @@ -95,14 +95,14 @@ where pub(super) fn hash_join_tuples_left_anti( probe: Vec, build: Vec, - join_nulls: bool, + nulls_equal: bool, ) -> Vec where I: IntoIterator + Copy + Send + Sync, T: TotalHash + TotalEq + DirtyHash + ToTotalOrd, ::TotalOrdItem: Send + Sync + Hash + Eq + DirtyHash + IsNull, { - let par_iter = semi_anti_impl(probe, build, join_nulls) + let par_iter = semi_anti_impl(probe, build, nulls_equal) .filter(|tpls| !tpls.1) .map(|tpls| tpls.0); POOL.install(|| par_iter.collect()) @@ -111,14 +111,14 @@ where pub(super) fn hash_join_tuples_left_semi( probe: Vec, build: Vec, - join_nulls: bool, + nulls_equal: bool, ) -> Vec where I: IntoIterator + Copy + Send + Sync, T: TotalHash + TotalEq + DirtyHash + ToTotalOrd, ::TotalOrdItem: Send + Sync + Hash + Eq + DirtyHash + IsNull, { - let par_iter = semi_anti_impl(probe, build, join_nulls) + let par_iter = semi_anti_impl(probe, build, nulls_equal) .filter(|tpls| tpls.1) .map(|tpls| tpls.0); POOL.install(|| par_iter.collect()) diff --git a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs index 4040c1260a7f..3f5587975e62 100644 --- a/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs +++ b/crates/polars-ops/src/frame/join/hash_join/sort_merge.rs @@ -187,9 +187,9 @@ pub(crate) fn _sort_or_hash_inner( s_right: &Series, _verbose: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(InnerJoinIds, bool)> { - s_left.hash_join_inner(s_right, validate, join_nulls) + s_left.hash_join_inner(s_right, validate, nulls_equal) } #[cfg(feature = "performant")] @@ -198,7 +198,7 @@ pub(crate) fn _sort_or_hash_inner( s_right: &Series, verbose: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult<(InnerJoinIds, bool)> { // We check if keys are sorted. // - If they are we can do a sorted merge join @@ -212,7 +212,7 @@ pub(crate) fn _sort_or_hash_inner( let is_numeric = s_left.dtype().to_physical().is_primitive_numeric(); if validate.needs_checks() { - return s_left.hash_join_inner(s_right, validate, join_nulls); + return s_left.hash_join_inner(s_right, validate, nulls_equal); } let no_nulls = s_left.null_count() == 0 && s_right.null_count() == 0; @@ -280,7 +280,7 @@ pub(crate) fn _sort_or_hash_inner( // set sorted to `false` as we descending sorted the left key. Ok(((left, right), false)) }, - _ => s_left.hash_join_inner(s_right, validate, join_nulls), + _ => s_left.hash_join_inner(s_right, validate, nulls_equal), } } @@ -290,9 +290,9 @@ pub(crate) fn sort_or_hash_left( s_right: &Series, _verbose: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult { - s_left.hash_join_left(s_right, validate, join_nulls) + s_left.hash_join_left(s_right, validate, nulls_equal) } #[cfg(feature = "performant")] @@ -301,10 +301,10 @@ pub(crate) fn sort_or_hash_left( s_right: &Series, verbose: bool, validate: JoinValidation, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult { if validate.needs_checks() { - return s_left.hash_join_left(s_right, validate, join_nulls); + return s_left.hash_join_left(s_right, validate, nulls_equal); } let size_factor_rhs = s_right.len() as f32 / s_left.len() as f32; @@ -356,6 +356,6 @@ pub(crate) fn sort_or_hash_left( Ok(to_left_join_ids(left, right)) }, // don't reverse sort a left join key yet. Have to figure out how to set sorted flag - _ => s_left.hash_join_left(s_right, validate, join_nulls), + _ => s_left.hash_join_left(s_right, validate, nulls_equal), } } diff --git a/crates/polars-ops/src/frame/join/mod.rs b/crates/polars-ops/src/frame/join/mod.rs index 4dcecb8f7f09..2ee2f6e58435 100644 --- a/crates/polars-ops/src/frame/join/mod.rs +++ b/crates/polars-ops/src/frame/join/mod.rs @@ -284,7 +284,7 @@ pub trait DataFrameJoinOps: IntoDf { s_right, args.slice, true, - args.join_nulls, + args.nulls_equal, ), #[cfg(feature = "semi_anti_join")] JoinType::Semi => left_df._semi_anti_join_from_series( @@ -292,7 +292,7 @@ pub trait DataFrameJoinOps: IntoDf { s_right, args.slice, false, - args.join_nulls, + args.nulls_equal, ), #[cfg(feature = "asof_join")] JoinType::AsOf(options) => match (options.left_by, options.right_by) { @@ -336,8 +336,8 @@ pub trait DataFrameJoinOps: IntoDf { }; } - let lhs_keys = prepare_keys_multiple(&selected_left, args.join_nulls)?.into_series(); - let rhs_keys = prepare_keys_multiple(&selected_right, args.join_nulls)?.into_series(); + let lhs_keys = prepare_keys_multiple(&selected_left, args.nulls_equal)?.into_series(); + let rhs_keys = prepare_keys_multiple(&selected_right, args.nulls_equal)?.into_series(); let drop_names = if should_coalesce { selected_right @@ -537,7 +537,7 @@ trait DataFrameJoinOpsPrivate: IntoDf { #[cfg(feature = "dtype-categorical")] _check_categorical_src(s_left.dtype(), s_right.dtype())?; let ((join_tuples_left, join_tuples_right), sorted) = - _sort_or_hash_inner(s_left, s_right, verbose, args.validation, args.join_nulls)?; + _sort_or_hash_inner(s_left, s_right, verbose, args.validation, args.nulls_equal)?; let mut join_tuples_left = &*join_tuples_left; let mut join_tuples_right = &*join_tuples_right; @@ -610,7 +610,7 @@ trait DataFrameJoinOpsPrivate: IntoDf { impl DataFrameJoinOps for DataFrame {} impl DataFrameJoinOpsPrivate for DataFrame {} -fn prepare_keys_multiple(s: &[Series], join_nulls: bool) -> PolarsResult { +fn prepare_keys_multiple(s: &[Series], nulls_equal: bool) -> PolarsResult { let keys = s .iter() .map(|s| { @@ -623,7 +623,7 @@ fn prepare_keys_multiple(s: &[Series], join_nulls: bool) -> PolarsResult>(); - if join_nulls { + if nulls_equal { encode_rows_vertical_par_unordered(&keys) } else { encode_rows_vertical_par_unordered_broadcast_nulls(&keys) @@ -632,7 +632,7 @@ fn prepare_keys_multiple(s: &[Series], join_nulls: bool) -> PolarsResult PolarsResult { // @scalar-opt let a_cols = a @@ -646,7 +646,7 @@ pub fn private_left_join_multiple_keys( .map(|c| c.as_materialized_series().clone()) .collect::>(); - let a = prepare_keys_multiple(&a_cols, join_nulls)?.into_series(); - let b = prepare_keys_multiple(&b_cols, join_nulls)?.into_series(); - sort_or_hash_left(&a, &b, false, JoinValidation::ManyToMany, join_nulls) + let a = prepare_keys_multiple(&a_cols, nulls_equal)?.into_series(); + let b = prepare_keys_multiple(&b_cols, nulls_equal)?.into_series(); + sort_or_hash_left(&a, &b, false, JoinValidation::ManyToMany, nulls_equal) } diff --git a/crates/polars-ops/src/series/ops/replace.rs b/crates/polars-ops/src/series/ops/replace.rs index 538994ce6151..d76427702253 100644 --- a/crates/polars-ops/src/series/ops/replace.rs +++ b/crates/polars-ops/src/series/ops/replace.rs @@ -174,7 +174,7 @@ fn replace_by_multiple( JoinArgs { how: JoinType::Left, coalesce: JoinCoalesce::CoalesceColumns, - join_nulls: true, + nulls_equal: true, ..Default::default() }, None, @@ -216,7 +216,7 @@ fn replace_by_multiple_strict(s: &Series, old: Series, new: Series) -> PolarsRes JoinArgs { how: JoinType::Left, coalesce: JoinCoalesce::CoalesceColumns, - join_nulls: true, + nulls_equal: true, ..Default::default() }, None, diff --git a/crates/polars-pipe/src/executors/sinks/joins/generic_build.rs b/crates/polars-pipe/src/executors/sinks/joins/generic_build.rs index 6108526ee6fc..0ee7b65dd906 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/generic_build.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/generic_build.rs @@ -48,7 +48,7 @@ pub struct GenericBuild { hashes: Vec, // the join order is swapped to ensure we hash the smaller table swapped: bool, - join_nulls: bool, + nulls_equal: bool, node: Node, key_names_left: Arc<[PlSmallStr]>, key_names_right: Arc<[PlSmallStr]>, @@ -63,7 +63,7 @@ impl GenericBuild { swapped: bool, join_columns_left: Arc>>, join_columns_right: Arc>>, - join_nulls: bool, + nulls_equal: bool, node: Node, key_names_left: Arc<[PlSmallStr]>, key_names_right: Arc<[PlSmallStr]>, @@ -86,7 +86,7 @@ impl GenericBuild { materialized_join_cols: vec![], hash_tables, hashes: vec![], - join_nulls, + nulls_equal, node, key_names_left, key_names_right, @@ -289,7 +289,7 @@ impl Sink for GenericBuild { self.swapped, self.join_columns_left.clone(), self.join_columns_right.clone(), - self.join_nulls, + self.nulls_equal, self.node, self.key_names_left.clone(), self.key_names_right.clone(), @@ -338,7 +338,7 @@ impl Sink for GenericBuild { hashes, context, self.join_args.clone(), - self.join_nulls, + self.nulls_equal, ); self.placeholder.replace(Box::new(probe_operator)); Ok(FinalizedSink::Operator) @@ -354,7 +354,7 @@ impl Sink for GenericBuild { join_columns_left, self.swapped, hashes, - self.join_nulls, + self.nulls_equal, coalesce, self.key_names_left.clone(), self.key_names_right.clone(), diff --git a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_inner_left.rs b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_inner_left.rs index b876f265c1e5..3301c591e02a 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_inner_left.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_inner_left.rs @@ -47,7 +47,7 @@ pub struct GenericJoinProbe { /// cached output names output_names: Option>, args: JoinArgs, - join_nulls: bool, + nulls_equal: bool, row_values: RowValues, } @@ -66,7 +66,7 @@ impl GenericJoinProbe { amortized_hashes: Vec, context: &PExecutionContext, args: JoinArgs, - join_nulls: bool, + nulls_equal: bool, ) -> Self { if swapped_or_left && args.should_coalesce() { let tmp = DataChunk { @@ -100,7 +100,7 @@ impl GenericJoinProbe { swapped_or_left, output_names: None, args, - join_nulls, + nulls_equal, row_values: RowValues::new(join_columns_right, !swapped_or_left), } } @@ -188,10 +188,10 @@ impl GenericJoinProbe { let mut hashes = std::mem::take(&mut self.hashes); let rows = self .row_values - .get_values(context, chunk, self.join_nulls)?; + .get_values(context, chunk, self.nulls_equal)?; hash_rows(&rows, &mut hashes, &self.hb); - if self.join_nulls || rows.null_count() == 0 { + if self.nulls_equal || rows.null_count() == 0 { let iter = hashes.iter().zip(rows.values_iter()).enumerate(); self.match_left(iter); } else { @@ -253,10 +253,10 @@ impl GenericJoinProbe { let mut hashes = std::mem::take(&mut self.hashes); let rows = self .row_values - .get_values(context, chunk, self.join_nulls)?; + .get_values(context, chunk, self.nulls_equal)?; hash_rows(&rows, &mut hashes, &self.hb); - if self.join_nulls || rows.null_count() == 0 { + if self.nulls_equal || rows.null_count() == 0 { let iter = hashes.iter().zip(rows.values_iter()).enumerate(); self.match_inner(iter); } else { diff --git a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs index 2176c0574322..7513d74567a4 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/generic_probe_outer.rs @@ -48,7 +48,7 @@ pub struct GenericFullOuterJoinProbe { swapped: bool, // cached output names output_names: Option>, - join_nulls: bool, + nulls_equal: bool, coalesce: bool, thread_no: usize, row_values: RowValues, @@ -68,7 +68,7 @@ impl GenericFullOuterJoinProbe { swapped: bool, // Re-use the hashes allocation of the build side. amortized_hashes: Vec, - join_nulls: bool, + nulls_equal: bool, coalesce: bool, key_names_left: Arc<[PlSmallStr]>, key_names_right: Arc<[PlSmallStr]>, @@ -85,7 +85,7 @@ impl GenericFullOuterJoinProbe { hashes: amortized_hashes, swapped, output_names: None, - join_nulls, + nulls_equal, coalesce, thread_no: 0, row_values: RowValues::new(join_columns_right, false), @@ -208,10 +208,10 @@ impl GenericFullOuterJoinProbe { let mut hashes = std::mem::take(&mut self.hashes); let rows = self .row_values - .get_values(context, chunk, self.join_nulls)?; + .get_values(context, chunk, self.nulls_equal)?; hash_rows(&rows, &mut hashes, &self.hb); - if self.join_nulls || rows.null_count() == 0 { + if self.nulls_equal || rows.null_count() == 0 { let iter = hashes.iter().zip(rows.values_iter()).enumerate(); self.match_outer(iter); } else { diff --git a/crates/polars-pipe/src/executors/sinks/joins/row_values.rs b/crates/polars-pipe/src/executors/sinks/joins/row_values.rs index 45d14fc60039..1b2b852a8f61 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/row_values.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/row_values.rs @@ -42,7 +42,7 @@ impl RowValues { &mut self, context: &PExecutionContext, chunk: &DataChunk, - join_nulls: bool, + nulls_equal: bool, ) -> PolarsResult> { // Memory should already be cleared on previous iteration. debug_assert!(self.join_columns_material.is_empty()); @@ -85,7 +85,7 @@ impl RowValues { // SAFETY: we keep rows-encode alive let array = unsafe { self.current_rows.borrow_array() }; - Ok(if join_nulls { + Ok(if nulls_equal { array } else { let validities = self diff --git a/crates/polars-pipe/src/pipeline/convert.rs b/crates/polars-pipe/src/pipeline/convert.rs index 24bdac5bb4ed..6b10ac1a0043 100644 --- a/crates/polars-pipe/src/pipeline/convert.rs +++ b/crates/polars-pipe/src/pipeline/convert.rs @@ -279,7 +279,7 @@ where swapped, join_columns_left, join_columns_right, - options.args.join_nulls, + options.args.nulls_equal, node, // We don't need the key names for these joins. vec![].into(), @@ -306,7 +306,7 @@ where swapped, join_columns_left, join_columns_right, - options.args.join_nulls, + options.args.nulls_equal, node, key_names_left, key_names_right, diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs index c38039e09641..3f3aa037084b 100644 --- a/crates/polars-python/src/lazyframe/general.rs +++ b/crates/polars-python/src/lazyframe/general.rs @@ -1023,7 +1023,7 @@ impl PyLazyFrame { .into()) } - #[pyo3(signature = (other, left_on, right_on, allow_parallel, force_parallel, join_nulls, how, suffix, validate, maintain_order, coalesce=None))] + #[pyo3(signature = (other, left_on, right_on, allow_parallel, force_parallel, nulls_equal, how, suffix, validate, maintain_order, coalesce=None))] fn join( &self, other: Self, @@ -1031,7 +1031,7 @@ impl PyLazyFrame { right_on: Vec, allow_parallel: bool, force_parallel: bool, - join_nulls: bool, + nulls_equal: bool, how: Wrap, suffix: String, validate: Wrap, @@ -1061,7 +1061,7 @@ impl PyLazyFrame { .right_on(right_on) .allow_parallel(allow_parallel) .force_parallel(force_parallel) - .join_nulls(join_nulls) + .join_nulls(nulls_equal) .how(how.0) .suffix(suffix) .validate(validate.0) diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 2b4dcad73f27..07df44a4df5b 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -509,7 +509,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { }, _ => name.into_any().unbind(), }, - options.args.join_nulls, + options.args.nulls_equal, options.args.slice, options.args.suffix().as_str(), options.args.coalesce.coalesce(how), diff --git a/crates/polars-sql/src/context.rs b/crates/polars-sql/src/context.rs index 3d8af16a3ce5..10bfb42dae35 100644 --- a/crates/polars-sql/src/context.rs +++ b/crates/polars-sql/src/context.rs @@ -855,7 +855,7 @@ impl SQLContext { validation: Default::default(), suffix: None, slice: None, - join_nulls: false, + nulls_equal: false, coalesce: Default::default(), maintain_order: polars_ops::frame::MaintainOrderJoin::Left, }, diff --git a/crates/polars-stream/src/nodes/joins/equi_join.rs b/crates/polars-stream/src/nodes/joins/equi_join.rs index bbadac1b6b73..051440985e40 100644 --- a/crates/polars-stream/src/nodes/joins/equi_join.rs +++ b/crates/polars-stream/src/nodes/joins/equi_join.rs @@ -134,7 +134,7 @@ async fn select_keys( Ok(HashKeys::from_df( &keys, params.random_state.clone(), - params.args.join_nulls, + params.args.nulls_equal, true, )) } diff --git a/crates/polars-stream/src/physical_plan/fmt.rs b/crates/polars-stream/src/physical_plan/fmt.rs index e8bdcaf32fd5..f878b3bc80f5 100644 --- a/crates/polars-stream/src/physical_plan/fmt.rs +++ b/crates/polars-stream/src/physical_plan/fmt.rs @@ -247,7 +247,7 @@ fn visualize_plan_rec( escape_graphviz(&format!("{:?}", args.how)) ) .unwrap(); - if args.join_nulls { + if args.nulls_equal { write!(label, r"\njoin-nulls").unwrap(); } (label, &[*input_left, *input_right][..]) diff --git a/docs/source/releases/upgrade/0.20.md b/docs/source/releases/upgrade/0.20.md index f001f4918e45..58901798c8b4 100644 --- a/docs/source/releases/upgrade/0.20.md +++ b/docs/source/releases/upgrade/0.20.md @@ -42,7 +42,7 @@ shape: (1, 3) ╞═════╪═════╪═════╡ │ 2 ┆ 4 ┆ 5 │ └─────┴─────┴─────┘ ->>> df1.join(df2, on="a", how="inner", join_nulls=True) # Keeps previous behavior +>>> df1.join(df2, on="a", how="inner", nulls_equal=True) # Keeps previous behavior shape: (2, 3) ┌──────┬─────┬─────┐ │ a ┆ b ┆ c │ diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 4034f0331d0e..ae4892c8602b 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7495,6 +7495,7 @@ def join_asof( .collect(_eager=True) ) + @deprecate_renamed_parameter("join_nulls", "nulls_equal", version="1.24") def join( self, other: DataFrame, @@ -7505,7 +7506,7 @@ def join( right_on: str | Expr | Sequence[str | Expr] | None = None, suffix: str = "_right", validate: JoinValidation = "m:m", - join_nulls: bool = False, + nulls_equal: bool = False, coalesce: bool | None = None, maintain_order: MaintainOrderJoin | None = None, ) -> DataFrame: @@ -7560,7 +7561,7 @@ def join( .. note:: This is currently not supported by the streaming engine. - join_nulls + nulls_equal Join on null values. By default null values will never produce matches. coalesce Coalescing behavior (merging of join columns). @@ -7716,7 +7717,7 @@ def join( how=how, suffix=suffix, validate=validate, - join_nulls=join_nulls, + nulls_equal=nulls_equal, coalesce=coalesce, maintain_order=maintain_order, ) diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py index 72e898250c7b..c962da96e4cf 100644 --- a/py-polars/polars/functions/eager.py +++ b/py-polars/polars/functions/eager.py @@ -325,7 +325,7 @@ def join_func( how=how, on=align_on, suffix=f":{y_idx}", - join_nulls=True, + nulls_equal=True, coalesce=True, maintain_order="right_left", ) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index cc4df14fdce6..5f9e8b4b75d4 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4846,6 +4846,7 @@ def join_asof( ) ) + @deprecate_renamed_parameter("join_nulls", "nulls_equal", version="1.24") def join( self, other: LazyFrame, @@ -4856,7 +4857,7 @@ def join( right_on: str | Expr | Sequence[str | Expr] | None = None, suffix: str = "_right", validate: JoinValidation = "m:m", - join_nulls: bool = False, + nulls_equal: bool = False, coalesce: bool | None = None, maintain_order: MaintainOrderJoin | None = None, allow_parallel: bool = True, @@ -4912,7 +4913,7 @@ def join( .. note:: This is currently not supported by the streaming engine. - join_nulls + nulls_equal Join on null values. By default null values will never produce matches. coalesce Coalescing behavior (merging of join columns). @@ -5081,7 +5082,7 @@ def join( [], allow_parallel, force_parallel, - join_nulls, + nulls_equal, how, suffix, validate, @@ -5107,7 +5108,7 @@ def join( pyexprs_right, allow_parallel, force_parallel, - join_nulls, + nulls_equal, how, suffix, validate, diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index 493ec9686bcd..76bd78ee8b7b 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -164,6 +164,15 @@ def test_deprecated() -> None: ) +def test_deprecated_parameter_join_nulls() -> None: + df = pl.DataFrame({"a": [1, None]}) + with pytest.deprecated_call( + match=r"The argument `join_nulls` for `DataFrame.join` is deprecated. It has been renamed to `nulls_equal`" + ): + result = df.join(df, on="a", join_nulls=True) # type: ignore[call-arg] + assert_frame_equal(result, df) + + def test_join_on_expressions() -> None: df_a = pl.DataFrame({"a": [1, 2, 3]}) @@ -940,11 +949,11 @@ def test_join_4_columns_with_validity() -> None: d=pl.col("a"), ) - assert a.join(a, on=["a", "b", "c", "d"], how="inner", join_nulls=True).shape == ( + assert a.join(a, on=["a", "b", "c", "d"], how="inner", nulls_equal=True).shape == ( 644, 4, ) - assert a.join(a, on=["a", "b", "c", "d"], how="inner", join_nulls=False).shape == ( + assert a.join(a, on=["a", "b", "c", "d"], how="inner", nulls_equal=False).shape == ( 115, 4, ) diff --git a/py-polars/tests/unit/sql/test_joins.py b/py-polars/tests/unit/sql/test_joins.py index 8e98cd5bd8b2..55ebc511730f 100644 --- a/py-polars/tests/unit/sql/test_joins.py +++ b/py-polars/tests/unit/sql/test_joins.py @@ -699,24 +699,24 @@ def test_sql_forbid_nested_join_unnamed_relation() -> None: ) -def test_join_nulls_19624() -> None: +def test_nulls_equal_19624() -> None: df1 = pl.DataFrame({"a": [1, 2, None, None]}) df2 = pl.DataFrame({"a": [1, 1, 2, 2, None], "b": [0, 1, 2, 3, 4]}) # left join - result_df = df1.join(df2, how="left", on="a", join_nulls=False, validate="1:m") + result_df = df1.join(df2, how="left", on="a", nulls_equal=False, validate="1:m") expected_df = pl.DataFrame( {"a": [1, 1, 2, 2, None, None], "b": [0, 1, 2, 3, None, None]} ) assert_frame_equal(result_df, expected_df) - result_df = df2.join(df1, how="left", on="a", join_nulls=False, validate="m:1") + result_df = df2.join(df1, how="left", on="a", nulls_equal=False, validate="m:1") expected_df = pl.DataFrame({"a": [1, 1, 2, 2, None], "b": [0, 1, 2, 3, 4]}) assert_frame_equal(result_df, expected_df) # inner join - result_df = df1.join(df2, how="inner", on="a", join_nulls=False, validate="1:m") + result_df = df1.join(df2, how="inner", on="a", nulls_equal=False, validate="1:m") expected_df = pl.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3]}) assert_frame_equal(result_df, expected_df) - result_df = df2.join(df1, how="inner", on="a", join_nulls=False, validate="m:1") + result_df = df2.join(df1, how="inner", on="a", nulls_equal=False, validate="m:1") expected_df = pl.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3]}) assert_frame_equal(result_df, expected_df) diff --git a/py-polars/tests/unit/streaming/test_streaming_join.py b/py-polars/tests/unit/streaming/test_streaming_join.py index 9a749c1da051..f3ea8fc7da3d 100644 --- a/py-polars/tests/unit/streaming/test_streaming_join.py +++ b/py-polars/tests/unit/streaming/test_streaming_join.py @@ -145,10 +145,10 @@ def test_join_null_matches(streaming: bool) -> None: } ) # Semi - assert df_a.join(df_b, on="a", how="semi", join_nulls=True).collect( + assert df_a.join(df_b, on="a", how="semi", nulls_equal=True).collect( streaming=streaming )["idx_a"].to_list() == [0, 1, 2] - assert df_a.join(df_b, on="a", how="semi", join_nulls=False).collect( + assert df_a.join(df_b, on="a", how="semi", nulls_equal=False).collect( streaming=streaming )["idx_a"].to_list() == [1, 2]