Skip to content

Commit

Permalink
feat: Add SQL support for the DELETE statement (#21190)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored Feb 12, 2025
1 parent 0654544 commit f69768d
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 15 deletions.
87 changes: 76 additions & 11 deletions crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use polars_plan::dsl::function_expr::StructFunction;
use polars_plan::prelude::*;
use polars_utils::format_pl_smallstr;
use sqlparser::ast::{
BinaryOperator, CreateTable, Distinct, ExcludeSelectItem, Expr as SQLExpr, FunctionArg,
GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset, OrderBy,
Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier, Statement,
TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
BinaryOperator, CreateTable, Delete, Distinct, ExcludeSelectItem, Expr as SQLExpr, FromTable,
FunctionArg, GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset,
OrderBy, Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier,
Statement, TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
WildcardAdditionalOptions,
};
use sqlparser::dialect::GenericDialect;
Expand Down Expand Up @@ -201,8 +201,9 @@ impl SQLContext {
} => self.execute_drop_table(stmt)?,
stmt @ Statement::Explain { .. } => self.execute_explain(stmt)?,
stmt @ Statement::Truncate { .. } => self.execute_truncate_table(stmt)?,
stmt @ Statement::Delete { .. } => self.execute_delete_from_table(stmt)?,
_ => polars_bail!(
SQLInterface: "statement type {:?} is not supported", ast,
SQLInterface: "statement type is not supported:\n{:?}", ast,
),
})
}
Expand Down Expand Up @@ -471,7 +472,7 @@ impl SQLContext {
let df = DataFrame::new(vec![plan])?;
Ok(df.lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected EXPLAIN"),
}
}

Expand All @@ -482,6 +483,7 @@ impl SQLContext {
Ok(df.lazy())
}

// DROP TABLE <tbl>
fn execute_drop_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
match stmt {
Statement::Drop { names, .. } => {
Expand All @@ -490,10 +492,68 @@ impl SQLContext {
});
Ok(DataFrame::empty().lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected DROP"),
}
}

// DELETE FROM <tbl> [WHERE ...]
fn execute_delete_from_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Delete(Delete {
tables,
from,
using,
selection,
returning,
order_by,
limit,
}) = stmt
{
if !tables.is_empty()
|| using.is_some()
|| returning.is_some()
|| limit.is_some()
|| !order_by.is_empty()
{
let error_message = match () {
_ if !tables.is_empty() => "DELETE expects exactly one table name",
_ if using.is_some() => "DELETE does not support the USING clause",
_ if returning.is_some() => "DELETE does not support the RETURNING clause",
_ if limit.is_some() => "DELETE does not support the LIMIT clause",
_ if !order_by.is_empty() => "DELETE does not support the ORDER BY clause",
_ => unreachable!(),
};
polars_bail!(SQLInterface: error_message);
}
let from_tables = match &from {
FromTable::WithFromKeyword(from) => from,
FromTable::WithoutKeyword(from) => from,
};
if from_tables.len() > 1 {
polars_bail!(SQLInterface: "cannot have multiple tables in DELETE FROM (found {})", from_tables.len())
}
let tbl_expr = from_tables.first().unwrap();
if !tbl_expr.joins.is_empty() {
polars_bail!(SQLInterface: "DELETE does not support table JOINs")
}
let (_, mut lf) = self.get_table(&tbl_expr.relation)?;
if selection.is_none() {
// no WHERE clause; equivalent to TRUNCATE (drop all rows)
Ok(DataFrame::empty_with_schema(
lf.schema_with_arenas(&mut self.lp_arena, &mut self.expr_arena)
.unwrap()
.as_ref(),
)
.lazy())
} else {
// apply constraint as inverted filter (drops rows matching the selection)
Ok(self.process_where(lf.clone(), selection, true)?)
}
} else {
polars_bail!(SQLInterface: "unexpected statement type; expected DELETE")
}
}

// TRUNCATE <tbl>
fn execute_truncate_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Truncate {
table_names,
Expand Down Expand Up @@ -524,7 +584,7 @@ impl SQLContext {
},
}
} else {
unreachable!()
polars_bail!(SQLInterface: "unexpected statement type; expected TRUNCATE")
}
}

Expand Down Expand Up @@ -645,7 +705,7 @@ impl SQLContext {

// Filter expression (WHERE clause)
let schema = self.get_frame_schema(&mut lf)?;
lf = self.process_where(lf, &select_stmt.selection)?;
lf = self.process_where(lf, &select_stmt.selection, false)?;

// 'SELECT *' modifiers
let mut select_modifiers = SelectModifiers {
Expand Down Expand Up @@ -907,6 +967,7 @@ impl SQLContext {
&mut self,
mut lf: LazyFrame,
expr: &Option<SQLExpr>,
invert_filter: bool,
) -> PolarsResult<LazyFrame> {
if let Some(expr) = expr {
let schema = self.get_frame_schema(&mut lf)?;
Expand All @@ -923,9 +984,9 @@ impl SQLContext {
},
_ => (false, false),
};
if all_true {
if (all_true && !invert_filter) || (all_false && invert_filter) {
return Ok(lf);
} else if all_false {
} else if (all_false && !invert_filter) || (all_true && invert_filter) {
return Ok(DataFrame::empty_with_schema(schema.as_ref()).lazy());
}

Expand All @@ -935,6 +996,10 @@ impl SQLContext {
filter_expression = all_horizontal([filter_expression])?;
}
lf = self.process_subqueries(lf, vec![&mut filter_expression]);
if invert_filter {
// negate the filter (being careful about null values)
filter_expression = filter_expression.neq_missing(lit(true))
}
lf = lf.filter(filter_expression);
}
Ok(lf)
Expand Down
15 changes: 15 additions & 0 deletions py-polars/docs/source/reference/sql/table_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Table Operations
- Description
* - :ref:`CREATE TABLE <create_table>`
- Create a new table and its columns from a SQL query executed against an existing table.
* - :ref:`DELETE FROM <delete_from_table>`
- Remove specific rows of data from a table using an (optional) constraint.
* - :ref:`DROP TABLES <drop_tables>`
- Deletes the specified table, unregistering it.
* - :ref:`EXPLAIN <explain>`
Expand All @@ -34,6 +36,19 @@ Create a new table and its columns from a SQL query executed against an existing
CREATE TABLE new_table AS
SELECT * FROM existing_table WHERE value > 42
.. _delete_from_table:

DELETE
------
Remove specific rows from a table using an (optional) constraint.
Omitting the constraint deletes all rows, equivalent to TRUNCATE.

**Example:**

.. code-block:: sql
DELETE FROM some_table WHERE value < 0
.. _drop_tables:

DROP TABLES
Expand Down
12 changes: 8 additions & 4 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4574,10 +4574,10 @@ def estimated_size(self, unit: SizeUnit = "b") -> int | float:
FFI buffers are included in this estimation.
Note
----
For objects, the estimated size only reports the pointer size, which is
a huge underestimation.
Notes
-----
For data with Object dtype, the estimated size only reports the pointer
size, which is a huge underestimation.
Parameters
----------
Expand Down Expand Up @@ -10741,6 +10741,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[False] = ...,
) -> dict[Any, list[Any]]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10750,6 +10751,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[True],
) -> dict[Any, Any]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10759,6 +10761,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[False] = ...,
) -> dict[Any, list[dict[str, Any]]]: ...

@overload
def rows_by_key(
self,
Expand All @@ -10768,6 +10771,7 @@ def rows_by_key(
include_key: bool = ...,
unique: Literal[True],
) -> dict[Any, dict[str, Any]]: ...

def rows_by_key(
self,
key: ColumnNameOrSelector | Sequence[ColumnNameOrSelector],
Expand Down
5 changes: 5 additions & 0 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,11 @@ def estimated_size(self, unit: SizeUnit = "b") -> int | float:
FFI buffers are included in this estimation.
Notes
-----
For data with Object dtype, the estimated size only reports the pointer
size, which is a huge underestimation.
Parameters
----------
unit : {'b', 'kb', 'mb', 'gb', 'tb'}
Expand Down
36 changes: 36 additions & 0 deletions py-polars/tests/unit/sql/test_table_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,42 @@ def test_frame() -> pl.LazyFrame:
)


@pytest.mark.parametrize(
("delete_constraint", "expected_ids"),
[
# basic constraints
("WHERE id = 200", {100, 300}),
("WHERE id = 200 OR id = 300", {100}),
("WHERE id IN (200, 300, 400)", {100}),
("WHERE id NOT IN (200, 300, 400)", {200, 300}),
# more involved constraints
("WHERE EXTRACT(year FROM dt) >= 2000", {200}),
# null-handling (in the data)
("WHERE v1 < 0", {100, 300}),
("WHERE v1 > 0", {200, 300}),
# null handling (in the constraint)
("WHERE v1 IS NULL", {100, 200}),
("WHERE v1 IS NOT NULL", {300}),
# boolean handling (delete all/none)
("WHERE FALSE", {100, 200, 300}),
("WHERE TRUE", set()),
# no constraint; equivalent to TRUNCATE (drop all rows)
("", set()),
],
)
def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:
df = pl.DataFrame(
{
"id": [100, 200, 300],
"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],
"v1": [3.5, -4.0, None],
"v2": [10.0, 2.5, -1.5],
}
)
res = df.sql(f"DELETE FROM self {delete_constraint}")
assert set(res["id"]) == expected_ids


def test_drop_table(test_frame: pl.LazyFrame) -> None:
# 'drop' completely removes the table from sql context
expected = pl.DataFrame()
Expand Down

0 comments on commit f69768d

Please sign in to comment.