Skip to content

Commit

Permalink
feat: Add SQL support for the DELETE statement
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Feb 11, 2025
1 parent c1e493a commit ba514a5
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 11 deletions.
87 changes: 76 additions & 11 deletions crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ use polars_plan::dsl::function_expr::StructFunction;
use polars_plan::prelude::*;
use polars_utils::format_pl_smallstr;
use sqlparser::ast::{
BinaryOperator, CreateTable, Distinct, ExcludeSelectItem, Expr as SQLExpr, FunctionArg,
GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset, OrderBy,
Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier, Statement,
TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
BinaryOperator, CreateTable, Delete, Distinct, ExcludeSelectItem, Expr as SQLExpr, FromTable,
FunctionArg, GroupByExpr, Ident, JoinConstraint, JoinOperator, ObjectName, ObjectType, Offset,
OrderBy, Query, RenameSelectItem, Select, SelectItem, SetExpr, SetOperator, SetQuantifier,
Statement, TableAlias, TableFactor, TableWithJoins, UnaryOperator, Value as SQLValue, Values,
WildcardAdditionalOptions,
};
use sqlparser::dialect::GenericDialect;
Expand Down Expand Up @@ -201,8 +201,9 @@ impl SQLContext {
} => self.execute_drop_table(stmt)?,
stmt @ Statement::Explain { .. } => self.execute_explain(stmt)?,
stmt @ Statement::Truncate { .. } => self.execute_truncate_table(stmt)?,
stmt @ Statement::Delete { .. } => self.execute_delete_from_table(stmt)?,
_ => polars_bail!(
SQLInterface: "statement type {:?} is not supported", ast,
SQLInterface: "statement type is not supported:\n{:?}", ast,
),
})
}
Expand Down Expand Up @@ -471,7 +472,7 @@ impl SQLContext {
let df = DataFrame::new(vec![plan])?;
Ok(df.lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected EXPLAIN"),
}
}

Expand All @@ -482,6 +483,7 @@ impl SQLContext {
Ok(df.lazy())
}

// DROP TABLE <tbl>
fn execute_drop_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
match stmt {
Statement::Drop { names, .. } => {
Expand All @@ -490,10 +492,68 @@ impl SQLContext {
});
Ok(DataFrame::empty().lazy())
},
_ => unreachable!(),
_ => polars_bail!(SQLInterface: "unexpected statement type; expected DROP"),
}
}

// DELETE FROM <tbl> [WHERE ...]
fn execute_delete_from_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Delete(Delete {
tables,
from,
using,
selection,
returning,
order_by,
limit,
}) = stmt
{
if !tables.is_empty()
|| using.is_some()
|| returning.is_some()
|| limit.is_some()
|| !order_by.is_empty()
{
let error_message = match () {
_ if !tables.is_empty() => "DELETE expects exactly one table name",
_ if using.is_some() => "DELETE does not support the USING clause",
_ if returning.is_some() => "DELETE does not support the RETURNING clause",
_ if limit.is_some() => "DELETE does not support the LIMIT clause",
_ if !order_by.is_empty() => "DELETE does not support the ORDER BY clause",
_ => unreachable!(),
};
polars_bail!(SQLInterface: error_message);
}
let from_tables = match &from {
FromTable::WithFromKeyword(from) => from,
FromTable::WithoutKeyword(from) => from,
};
if from_tables.len() > 1 {
polars_bail!(SQLInterface: "cannot have multiple tables in DELETE FROM (found {})", from_tables.len())
}
let tbl_expr = from_tables.first().unwrap();
if !tbl_expr.joins.is_empty() {
polars_bail!(SQLInterface: "DELETE does not support table JOINs")
}
let (_, mut lf) = self.get_table(&tbl_expr.relation)?;
if selection.is_none() {
// no WHERE clause; equivalent to TRUNCATE (drop all rows)
Ok(DataFrame::empty_with_schema(
lf.schema_with_arenas(&mut self.lp_arena, &mut self.expr_arena)
.unwrap()
.as_ref(),
)
.lazy())
} else {
// apply constraint as inverted filter (drops rows matching the selection)
Ok(self.process_where(lf.clone(), selection, true)?)
}
} else {
polars_bail!(SQLInterface: "unexpected statement type; expected DELETE")
}
}

// TRUNCATE <tbl>
fn execute_truncate_table(&mut self, stmt: &Statement) -> PolarsResult<LazyFrame> {
if let Statement::Truncate {
table_names,
Expand Down Expand Up @@ -524,7 +584,7 @@ impl SQLContext {
},
}
} else {
unreachable!()
polars_bail!(SQLInterface: "unexpected statement type; expected TRUNCATE")
}
}

Expand Down Expand Up @@ -645,7 +705,7 @@ impl SQLContext {

// Filter expression (WHERE clause)
let schema = self.get_frame_schema(&mut lf)?;
lf = self.process_where(lf, &select_stmt.selection)?;
lf = self.process_where(lf, &select_stmt.selection, false)?;

// 'SELECT *' modifiers
let mut select_modifiers = SelectModifiers {
Expand Down Expand Up @@ -907,6 +967,7 @@ impl SQLContext {
&mut self,
mut lf: LazyFrame,
expr: &Option<SQLExpr>,
invert_filter: bool,
) -> PolarsResult<LazyFrame> {
if let Some(expr) = expr {
let schema = self.get_frame_schema(&mut lf)?;
Expand All @@ -923,9 +984,9 @@ impl SQLContext {
},
_ => (false, false),
};
if all_true {
if (all_true && !invert_filter) || (all_false && invert_filter) {
return Ok(lf);
} else if all_false {
} else if (all_false && !invert_filter) || (all_true && invert_filter) {
return Ok(DataFrame::empty_with_schema(schema.as_ref()).lazy());
}

Expand All @@ -935,6 +996,10 @@ impl SQLContext {
filter_expression = all_horizontal([filter_expression])?;
}
lf = self.process_subqueries(lf, vec![&mut filter_expression]);
if invert_filter {
// negate the filter (being careful about null values)
filter_expression = filter_expression.neq_missing(lit(true))
}
lf = lf.filter(filter_expression);
}
Ok(lf)
Expand Down
36 changes: 36 additions & 0 deletions py-polars/tests/unit/sql/test_table_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,42 @@ def test_frame() -> pl.LazyFrame:
)


@pytest.mark.parametrize(
("delete_constraint", "expected_ids"),
[
# basic constraints
("WHERE id = 200", {100, 300}),
("WHERE id = 200 OR id = 300", {100}),
("WHERE id IN (200, 300, 400)", {100}),
("WHERE id NOT IN (200, 300, 400)", {200, 300}),
# more involved constraints
("WHERE EXTRACT(year FROM dt) >= 2000", {200}),
# null-handling (in the data)
("WHERE v1 < 0", {100, 300}),
("WHERE v1 > 0", {200, 300}),
# null handling (in the constraint)
("WHERE v1 IS NULL", {100, 200}),
("WHERE v1 IS NOT NULL", {300}),
# boolean handling (delete all/none)
("WHERE FALSE", {100, 200, 300}),
("WHERE TRUE", set()),
# no constraint; equivalent to TRUNCATE (drop all rows)
("", set()),
],
)
def test_delete_clause(delete_constraint: str, expected_ids: set[int]) -> None:
df = pl.DataFrame(
{
"id": [100, 200, 300],
"dt": [date(2020, 10, 10), date(1999, 1, 2), date(2001, 7, 5)],
"v1": [3.5, -4.0, None],
"v2": [10.0, 2.5, -1.5],
}
)
res = df.sql(f"DELETE FROM self {delete_constraint}")
assert set(res["id"]) == expected_ids


def test_drop_table(test_frame: pl.LazyFrame) -> None:
# 'drop' completely removes the table from sql context
expected = pl.DataFrame()
Expand Down

0 comments on commit ba514a5

Please sign in to comment.