Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: JSON encoding in the root expression namespace pl.Expr.json_encode #18396

Closed
15 changes: 15 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/dispatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,18 @@ pub(super) fn extend_constant(s: &[Series]) -> PolarsResult<Series> {
},
}
}

#[cfg(feature = "json")]
pub(super) fn json_encode(s: &Series, ignore_nulls: bool) -> PolarsResult<Series> {
if ignore_nulls {
panic!("ignore_nulls not implemented")
}
let dtype = s.dtype().to_arrow(CompatLevel::newest());
let ca = s.as_ref();
let iter = ca.chunks().iter().map(|arr| {
let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap();
polars_json::json::write::serialize_to_utf8(arr.as_ref())
});

Ok(StringChunked::from_chunk_iter(ca.name(), iter).into_series())
}
8 changes: 8 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ pub enum FunctionExpr {
#[cfg(feature = "reinterpret")]
Reinterpret(bool),
ExtendConstant,
#[cfg(feature = "json")]
JsonEncode(bool),
}

impl Hash for FunctionExpr {
Expand Down Expand Up @@ -579,6 +581,8 @@ impl Hash for FunctionExpr {
ExtendConstant => {},
#[cfg(feature = "top_k")]
TopKBy { descending } => descending.hash(state),
#[cfg(feature = "json")]
JsonEncode(a) => a.hash(state),
}
}
}
Expand Down Expand Up @@ -764,6 +768,8 @@ impl Display for FunctionExpr {
#[cfg(feature = "reinterpret")]
Reinterpret(_) => "reinterpret",
ExtendConstant => "extend_constant",
#[cfg(feature = "json")]
JsonEncode(_) => "json_encode",
};
write!(f, "{s}")
}
Expand Down Expand Up @@ -1160,6 +1166,8 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {
#[cfg(feature = "reinterpret")]
Reinterpret(signed) => map!(dispatch::reinterpret, signed),
ExtendConstant => map_as_slice!(dispatch::extend_constant),
#[cfg(feature = "json")]
JsonEncode(ignore_nulls) => map!(dispatch::json_encode, ignore_nulls),
}
}
}
2 changes: 2 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ impl FunctionExpr {
mapper.with_dtype(dt)
},
ExtendConstant => mapper.with_same_dtype(),
#[cfg(feature = "json")]
JsonEncode(_) => mapper.with_dtype(DataType::String),
}
}

Expand Down
5 changes: 5 additions & 0 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,11 @@ impl Expr {
self.apply_many_private(FunctionExpr::ExtendConstant, &[value, n], false, false)
}

#[cfg(feature = "json")]
pub fn json_encode(self, ignore_nulls: bool) -> Expr {
self.map_private(FunctionExpr::JsonEncode(ignore_nulls))
}

#[cfg(feature = "strings")]
/// Get the [`string::StringNameSpace`]
pub fn str(self) -> string::StringNameSpace {
Expand Down
5 changes: 5 additions & 0 deletions crates/polars-python/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -935,4 +935,9 @@ impl PyExpr {
.hist(bins, bin_count, include_category, include_breakpoint)
.into()
}

#[cfg(feature = "json")]
fn json_encode(&self, ignore_nulls: bool) -> Self {
self.inner.clone().json_encode(ignore_nulls).into()
}
}
5 changes: 0 additions & 5 deletions crates/polars-python/src/expr/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@ impl PyExpr {
self.inner.clone().struct_().rename_fields(names).into()
}

#[cfg(feature = "json")]
fn struct_json_encode(&self) -> Self {
self.inner.clone().struct_().json_encode().into()
}

fn struct_with_fields(&self, fields: Vec<PyExpr>) -> PyResult<Self> {
let fields = fields.to_exprs();
let e = self
Expand Down
4 changes: 4 additions & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,10 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
FunctionExpr::EwmMeanBy { half_life: _ } => {
return Err(PyNotImplementedError::new_err("ewm_mean_by"))
},
#[cfg(feature = "json")]
FunctionExpr::JsonEncode(_) => {
return Err(PyNotImplementedError::new_err("json_encode"))
},
},
options: py.None(),
}
Expand Down
67 changes: 67 additions & 0 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -10696,6 +10696,73 @@ def struct(self) -> ExprStructNameSpace:
"""
return ExprStructNameSpace(self)

def json_encode(self, *, ignore_nulls: bool = False) -> Expr:
r"""
Convert individual values into their JSON string representation.

Parameters
----------
ignore_nulls
Ignore missing values in the struct when serializing.
- When `ignore_nulls=False`, the values in the struct are included even
if they are null (they serialize into "null")
- When `ignore_nulls=True`, the values in the struct are skipped if they
are null

Returns
-------
Expr
Expression of data type :class:`String`.

See Also
--------
Expr.str.json_decode

Examples
--------
>>> pl.DataFrame(
... {"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
... ).with_columns(pl.col("a").json_encode().alias("encoded"))
shape: (2, 2)
┌──────────────────┬────────────────────────┐
│ a ┆ encoded │
│ --- ┆ --- │
│ struct[2] ┆ str │
╞══════════════════╪════════════════════════╡
│ {[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
│ {[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
└──────────────────┴────────────────────────┘

>>> pl.DataFrame({"a": [[1, 2], [45], [9, 1, 3], None]}).with_columns(
... pl.col("a").json_encode().alias("encoded")
... )
shape: (4, 2)
┌───────────┬─────────┐
│ a ┆ encoded │
│ --- ┆ --- │
│ list[i64] ┆ str │
╞═══════════╪═════════╡
│ [1, 2] ┆ [1,2] │
│ [45] ┆ [45] │
│ [9, 1, 3] ┆ [9,1,3] │
│ null ┆ null │
└───────────┴─────────┘

>>> pl.DataFrame({"a": [["\\", '"foo"'], [None, ""]]}).with_columns(
... pl.col("a").json_encode().alias("encoded")
... )
shape: (2, 2)
┌────────────────┬──────────────────┐
│ a ┆ encoded │
│ --- ┆ --- │
│ list[str] ┆ str │
╞════════════════╪══════════════════╡
│ ["\", ""foo""] ┆ ["\\","\"foo\""] │
│ [null, ""] ┆ [null,""] │
└────────────────┴──────────────────┘
"""
return self._from_pyexpr(self._pyexpr.json_encode(ignore_nulls))


def _prepare_alpha(
com: float | int | None = None,
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,7 @@ def json_decode(
--------
json_path_match : Extract the first match of json string with provided JSONPath
expression.
Expr.json_encode : Encode values as JSON string.

Examples
--------
Expand Down
21 changes: 6 additions & 15 deletions py-polars/polars/expr/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
from typing import TYPE_CHECKING, Iterable, Sequence

from polars._utils.deprecation import deprecate_renamed_function
from polars._utils.parse import parse_into_list_of_expressions
from polars._utils.wrap import wrap_expr

Expand Down Expand Up @@ -214,26 +215,16 @@ def rename_fields(self, names: Sequence[str]) -> Expr:
"""
return wrap_expr(self._pyexpr.struct_rename_fields(names))

@deprecate_renamed_function("Expr.json_encode", version="x.y.z")
def json_encode(self) -> Expr:
"""
Convert this struct to a string column with json values.

Examples
--------
>>> pl.DataFrame(
... {"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
... ).with_columns(pl.col("a").struct.json_encode().alias("encoded"))
shape: (2, 2)
┌──────────────────┬────────────────────────┐
│ a ┆ encoded │
│ --- ┆ --- │
│ struct[2] ┆ str │
╞══════════════════╪════════════════════════╡
│ {[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
│ {[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
└──────────────────┴────────────────────────┘
.. deprecated:: x.y.z
This method has been renamed to :meth:`Expr.json_encode`.

"""
return wrap_expr(self._pyexpr.struct_json_encode())
return wrap_expr(self._pyexpr.json_encode())

def with_fields(
self,
Expand Down
45 changes: 45 additions & 0 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -7346,6 +7346,51 @@ def implode(self) -> Self:
]
"""

def json_encode(self, *, ignore_nulls: bool = False) -> Self:
"""
Convert this Series into a string Series with json values.

Parameters
----------
ignore_nulls
Ignore missing values in the struct when serializing.
- When `ignore_nulls=False`, the values in the struct are included even
if they are null (they serialize into "null")
- When `ignore_nulls=True`, the values in the struct are skipped if they
are null

Returns
-------
Series
Series of data type :class:`String`.

See Also
--------
Series.str.json_decode : Deserialize a string Series with json values.

Examples
--------
>>> s = pl.Series("a", [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}])
>>> s.json_encode()
shape: (2,)
Series: 'a' [str]
[
"{"a":[1,2],"b":[45]}"
"{"a":[9,1,3],"b":null}"
]

>>> s = pl.Series("b", [[1, 2, 3], [], [None, 3], [5, 6, 7]])
>>> s.json_encode()
shape: (4,)
Series: 'b' [str]
[
"[1,2,3]"
"[]"
"[null,3]"
"[5,6,7]"
]
"""

# Keep the `list` and `str` properties below at the end of the definition of Series,
# as to not confuse mypy with the type annotation `str` and `list`

Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,7 @@ def json_decode(
--------
json_path_match : Extract the first match of json string with provided JSONPath
expression.
Series.json_encode: Encode a Series' values as JSON.

Examples
--------
Expand Down
15 changes: 5 additions & 10 deletions py-polars/polars/series/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import TYPE_CHECKING, Sequence

from polars._utils.deprecation import deprecate_renamed_function
from polars._utils.various import BUILDING_SPHINX_DOCS, sphinx_accessor
from polars._utils.wrap import wrap_df
from polars.schema import Schema
Expand Down Expand Up @@ -127,18 +128,12 @@ def unnest(self) -> DataFrame:
"""
return wrap_df(self._s.struct_unnest())

@deprecate_renamed_function("Series.json_encode", version="x.y.z")
def json_encode(self) -> Series:
"""
Convert this struct to a string column with json values.

Examples
--------
>>> s = pl.Series("a", [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}])
>>> s.struct.json_encode()
shape: (2,)
Series: 'a' [str]
[
"{"a":[1,2],"b":[45]}"
"{"a":[9,1,3],"b":null}"
]
.. deprecated:: x.y.z
This method has been renamed to :meth:`Series.json_encode`.

"""
29 changes: 0 additions & 29 deletions py-polars/tests/unit/operations/namespaces/test_struct.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import datetime
from collections import OrderedDict

import polars as pl
Expand Down Expand Up @@ -36,34 +35,6 @@ def test_rename_fields() -> None:
assert s.struct.fields == ["a", "b"]


def test_struct_json_encode() -> None:
assert pl.DataFrame(
{"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
).with_columns(pl.col("a").struct.json_encode().alias("encoded")).to_dict(
as_series=False
) == {
"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}],
"encoded": ['{"a":[1,2],"b":[45]}', '{"a":[9,1,3],"b":null}'],
}


def test_struct_json_encode_logical_type() -> None:
df = pl.DataFrame(
{
"a": [
{
"a": [datetime.date(1997, 1, 1)],
"b": [datetime.datetime(2000, 1, 29, 10, 30)],
"c": [datetime.timedelta(1, 25)],
}
]
}
).select(pl.col("a").struct.json_encode().alias("encoded"))
assert df.to_dict(as_series=False) == {
"encoded": ['{"a":["1997-01-01"],"b":["2000-01-29 10:30:00"],"c":["PT86425S"]}']
}


def test_map_fields() -> None:
df = pl.DataFrame({"x": {"a": 1, "b": 2}})
assert df.schema == OrderedDict([("x", pl.Struct({"a": pl.Int64, "b": pl.Int64}))])
Expand Down
Loading
Loading