Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add SQL support for the DELETE statement #21190

Merged
merged 3 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 76 additions & 11 deletions crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use polars_plan::dsl::function_expr::StructFunction;
use polars_plan::prelude::*;
use polars_utils::format_pl_smallstr;
use sqlparser::ast::{
BinaryOperator, CreateTable, Distinct, ExcludeSelectItem, Expr as SQLExpr, FunctionArg,
GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset, OrderBy,
Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier, Statement,
TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
BinaryOperator, CreateTable, Delete, Distinct, ExcludeSelectItem, Expr as SQLExpr, FromTable,
FunctionArg, GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset,
OrderBy, Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier,
Statement, TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
WildcardAdditionalOptions,
};
use sqlparser::dialect::GenericDialect;
Expand Down Expand Up @@ -201,8 +201,9 @@ impl SQLContext {
} => self.execute_drop_table(stmt)?,
stmt @ Statement::Explain { .. } => self.execute_explain(stmt)?,
stmt @ Statement::Truncate { .. } => self.execute_truncate_table(stmt)?,
stmt @ Statement::Delete { .. } => self.execute_delete_from_table(stmt)?,
_ => polars_bail!(
SQLInterface: "statement type {:?} is not supported", ast,
SQLInterface: "statement type is not supported:\n{:?}", ast,
),
})
}
Expand Down Expand Up @@ -471,7 +472,7 @@ impl SQLContext {
let df = DataFrame::new(vec![plan])?;
Ok(df.lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected EXPLAIN"),
}
}

Expand All @@ -482,6 +483,7 @@ impl SQLContext {
Ok(df.lazy())
}

// DROP TABLE <tbl>
fn execute_drop_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
match stmt {
Statement::Drop { names, .. } => {
Expand All @@ -490,10 +492,68 @@ impl SQLContext {
});
Ok(DataFrame::empty().lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected DROP"),
}
}

// DELETE FROM <tbl> [WHERE ...]
fn execute_delete_from_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Delete(Delete {
tables,
from,
using,
selection,
returning,
order_by,
limit,
}) = stmt
{
if !tables.is_empty()
|| using.is_some()
|| returning.is_some()
|| limit.is_some()
|| !order_by.is_empty()
{
let error_message = match () {
_ if !tables.is_empty() => "DELETE expects exactly one table name",
_ if using.is_some() => "DELETE does not support the USING clause",
_ if returning.is_some() => "DELETE does not support the RETURNING clause",
_ if limit.is_some() => "DELETE does not support the LIMIT clause",
_ if !order_by.is_empty() => "DELETE does not support the ORDER BY clause",
_ => unreachable!(),
};
polars_bail!(SQLInterface: error_message);
}
let from_tables = match &from {
FromTable::WithFromKeyword(from) => from,
FromTable::WithoutKeyword(from) => from,
};
if from_tables.len() > 1 {
polars_bail!(SQLInterface: "cannot have multiple tables in DELETE FROM (found {})", from_tables.len())
}
let tbl_expr = from_tables.first().unwrap();
if !tbl_expr.joins.is_empty() {
polars_bail!(SQLInterface: "DELETE does not support table JOINs")
}
let (_, mut lf) = self.get_table(&tbl_expr.relation)?;
if selection.is_none() {
// no WHERE clause; equivalent to TRUNCATE (drop all rows)
Ok(DataFrame::empty_with_schema(
lf.schema_with_arenas(&mut self.lp_arena, &mut self.expr_arena)
.unwrap()
.as_ref(),
)
.lazy())
} else {
// apply constraint as inverted filter (drops rows matching the selection)
Ok(self.process_where(lf.clone(), selection, true)?)
}
} else {
polars_bail!(SQLInterface: "unexpected statement type; expected DELETE")
}
}

// TRUNCATE <tbl>
fn execute_truncate_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Truncate {
table_names,
Expand Down Expand Up @@ -524,7 +584,7 @@ impl SQLContext {
},
}
} else {
unreachable!()
polars_bail!(SQLInterface: "unexpected statement type; expected TRUNCATE")
}
}

Expand Down Expand Up @@ -645,7 +705,7 @@ impl SQLContext {

// Filter expression (WHERE clause)
let schema = self.get_frame_schema(&mut lf)?;
lf = self.process_where(lf, &select_stmt.selection)?;
lf = self.process_where(lf, &select_stmt.selection, false)?;

// 'SELECT *' modifiers
let mut select_modifiers = SelectModifiers {
Expand Down Expand Up @@ -907,6 +967,7 @@ impl SQLContext {
&mut self,
mut lf: LazyFrame,
expr: &Option<SQLExpr>,
invert_filter: bool,
) -> PolarsResult<LazyFrame> {
if let Some(expr) = expr {
let schema = self.get_frame_schema(&mut lf)?;
Expand All @@ -923,9 +984,9 @@ impl SQLContext {
},
_ => (false, false),
};
if all_true {
if (all_true && !invert_filter) || (all_false && invert_filter) {
return Ok(lf);
} else if all_false {
} else if (all_false && !invert_filter) || (all_true && invert_filter) {
return Ok(DataFrame::empty_with_schema(schema.as_ref()).lazy());
}

Expand All @@ -935,6 +996,10 @@ impl SQLContext {
filter_expression = all_horizontal([filter_expression])?;
}
lf = self.process_subqueries(lf, vec![&mut filter_expression]);
if invert_filter {
// negate the filter (being careful about null values)
filter_expression = filter_expression.neq_missing(lit(true))
}
lf = lf.filter(filter_expression);
}
Ok(lf)
Expand Down
15 changes: 15 additions & 0 deletions py-polars/docs/source/reference/sql/table_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Table Operations
- Description
* - :ref:`CREATE TABLE <create_table>`
- Create a new table and its columns from a SQL query executed against an existing table.
* - :ref:`DELETE FROM <delete_from_table>`
- Remove specific rows of data from a table using an (optional) constraint.
* - :ref:`DROP TABLES <drop_tables>`
- Deletes the specified table, unregistering it.
* - :ref:`EXPLAIN <explain>`
Expand All @@ -34,6 +36,19 @@ Create a new table and its columns from a SQL query executed against an existing
CREATE TABLE new_table AS
SELECT * FROM existing_table WHERE value > 42

.. _delete_from_table:

DELETE
------
Remove specific rows from a table using an (optional) constraint.
Omitting the constraint deletes all rows, equivalent to TRUNCATE.

**Example:**

.. code-block:: sql

DELETE FROM some_table WHERE value < 0

.. _drop_tables:

DROP TABLES
Expand Down
12 changes: 8 additions & 4 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4574,10 +4574,10 @@ def estimated_size(self, unit: SizeUnit = "b") -> int | float:

FFI buffers are included in this estimation.

Note
----
For objects, the estimated size only reports the pointer size, which is
a huge underestimation.
Notes
-----
For data with Object dtype, the estimated size only reports the pointer
size, which is a huge underestimation.

Parameters
----------
Expand Down Expand Up @@ -10741,6 +10741,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[False] = ...,
) -> dict[Any, list[Any]]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10750,6 +10751,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[True],
) -> dict[Any, Any]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10759,6 +10761,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[False] = ...,
) -> dict[Any, list[dict[str, Any]]]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10768,6 +10771,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[True],
) -> dict[Any, dict[str, Any]]: ...

def rows_by_key(
self,
key: ColumnNameOrSelector | Sequence[ColumnNameOrSelector],
Expand Down
5 changes: 5 additions & 0 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,11 @@ def estimated_size(self, unit: SizeUnit = "b") -> int | float:

FFI buffers are included in this estimation.

Notes
-----
For data with Object dtype, the estimated size only reports the pointer
size, which is a huge underestimation.

Parameters
----------
unit : {'b', 'kb', 'mb', 'gb', 'tb'}
Expand Down
36 changes: 36 additions & 0 deletions py-polars/tests/unit/sql/test_table_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,42 @@ def test_frame() -> pl.LazyFrame:
)


@pytest.mark.parametrize(
("delete_constraint", "expected_ids"),
[
# basic constraints
("WHERE id = 200", {100, 300}),
("WHERE id = 200 OR id = 300", {100}),
("WHERE id IN (200, 300, 400)", {100}),
("WHERE id NOT IN (200, 300, 400)", {200, 300}),
# more involved constraints
("WHERE EXTRACT(year FROM dt) >= 2000", {200}),
# null-handling (in the data)
("WHERE v1 < 0", {100, 300}),
("WHERE v1 > 0", {200, 300}),
# null handling (in the constraint)
("WHERE v1 IS NULL", {100, 200}),
("WHERE v1 IS NOT NULL", {300}),
# boolean handling (delete all/none)
("WHERE FALSE", {100, 200, 300}),
("WHERE TRUE", set()),
# no constraint; equivalent to TRUNCATE (drop all rows)
("", set()),
],
)
def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:
df = pl.DataFrame(
{
"id": [100, 200, 300],
"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],
"v1": [3.5, -4.0, None],
"v2": [10.0, 2.5, -1.5],
}
)
res = df.sql(f"DELETE FROM self {delete_constraint}")
assert set(res["id"]) == expected_ids


def test_drop_table(test_frame: pl.LazyFrame) -> None:
# 'drop' completely removes the table from sql context
expected = pl.DataFrame()
Expand Down
Loading