diff --git a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/catalog/ResolvedType.java b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/catalog/ResolvedType.java index a3250c46..5a0f18c4 100644 --- a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/catalog/ResolvedType.java +++ b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/catalog/ResolvedType.java @@ -24,8 +24,8 @@ public class ResolvedType } } } - public static ResolvedType STRING = ResolvedType.of(Type.String); - public static ResolvedType ANY = ResolvedType.of(Type.Any); + public static final ResolvedType STRING = ResolvedType.of(Type.String); + public static final ResolvedType ANY = ResolvedType.of(Type.Any); private final Type type; /** Type used do specify the contained type if {@link #type} is {@link Type#Array} */ diff --git a/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDown.java b/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDown.java index 36e3a962..dcae34eb 100644 --- a/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDown.java +++ b/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDown.java @@ -202,7 +202,9 @@ protected ILogicalPlan create(Projection plan, Ctx context) context.current = input; context.outerSchema = SchemaUtils.joinSchema(context.outerSchema, input.getSchema()); + // CSOFF int prevSubQueryExpressionOrdinal = context.subQueryExpressionOrdinal; + // CSON // Check if the projection is asterisk or not. Used to determine if we can use ordinals // or not for sub query expression substitute columns. @@ -283,31 +285,23 @@ protected ILogicalPlan create(Projection plan, Ctx context) } // We have nested projections, combine these - if (context.current instanceof Projection p) + if (context.current instanceof MaxRowCountAssert a + && a.getInput() instanceof Projection p) { + // Projection + // -- MaxRowCountAssert <--- context.current + // ---- Projection + // ------- Input + // + // Merge this into: + // Projection <--- Projections merged into this one + // -- MaxRowCountAssert + // ---- Input + // Rewrite this projection expressions with the inner expressions = ProjectionMerger.replace(expressions, p.getExpressions()); // ... and remove the inner projection - context.current = p.getInput(); - } - else if (context.current instanceof OperatorFunctionScan ofs) - { - // Remove this projection since the child is a operator function which already has correct schema - // just change it's name - if (alias != null - && size == 1) - { - // Restore previous context values - context.current = prevCurrent; - context.subQueryExpressionOrdinal = prevSubQueryExpressionOrdinal; - context.outerSchema = prevOuterSchema; - context.inputSchemaAsterisk = prevInputSchemaAsterisk; - - Schema schema = Schema.of(SchemaUtils.rename(ofs.getSchema() - .getColumns() - .get(0), alias)); - return new OperatorFunctionScan(schema, ofs.getInput(), ofs.getCatalogAlias(), ofs.getFunction(), ofs.getLocation()); - } + context.current = new MaxRowCountAssert(p.getInput(), a.getMaxRowCount()); } // CSOFF @@ -340,76 +334,73 @@ public IExpression visit(UnresolvedSubQueryExpression expression, Ctx ctx) ILogicalPlan plan = expression.getInput() .accept(ctx.visitor, ctx); - // The current plan is a constant scan, ie. a nested sub query expression with - // out a table source, then we unwrap this and don't create a join - if (ctx.current instanceof ConstantScan) + // We have a non operator function here then we need to make sure that the query + // don't return more than one row since a sub query expression is a scalar expression + if (wrapInAssert(expression)) { - ctx.current = plan; - } - else - { - // We have a non operator function here then we need to make sure that the query - // don't return more than one row since a sub query expression is a scalar expression - // If the plan is a OperatorFunctionScan it always return one row - if (!(plan instanceof OperatorFunctionScan)) - { - boolean addAssert = true; + boolean has1Limit = (plan instanceof Limit l + && l.getLimitExpression() instanceof LiteralIntegerExpression lie + && lie.getValue() <= 1); - // If we have a limit plan with a literal 1 then we don't need an assert - if (plan instanceof Limit) - { - Limit limit = (Limit) plan; - if (limit.getLimitExpression() instanceof LiteralIntegerExpression - && ((LiteralIntegerExpression) limit.getLimitExpression()).getValue() <= 1) - { - addAssert = false; - } - } - - if (addAssert) - { - plan = new MaxRowCountAssert(plan, 1); - } + // If we have a limit plan with a literal 1 then we don't need an assert + if (!has1Limit) + { + plan = new MaxRowCountAssert(plan, 1); } + } - /* - * @formatter:off - * - * if we have a non-correlated sub query then we put the plan as outer in a nested loop (left) because - * then that will only execute once BUT we must make sure that the query never circuit breaks - * the main query by returning zero rows (that will be catastrophic because the main query will be empty) - * so we nest it in another nested loop (left) - * with a constant scan as outer, that way we are guaranteed that we always end up with at least one row. - * - * nested loop (left) - * nested loop (left) <-- Will always return at least 1 row - * constant scan - * sub query plan - * current plan - * - * if we have a correlated sub query we simply put a nested loop with plan as inner since the existing plan will be outer. - * If we are on top it's safe because if that returns 0 rows we will get 0 rows in the main query. if it's a nested - * sub query we are also safe because of previous bullet - * - * nested loop (left) - * current plan - * sub query plan - * - * - * @formatter:on - */ - - boolean correlated = !expression.getOuterReferences() - .isEmpty(); - - // If the sub query is not correlated we put the sub query as the outer in the join - // this because we only want to execute that once since it doesn't change - // We also need to flag for schema switch to stay consistent - ILogicalPlan left = correlated ? ctx.current - : plan; - ILogicalPlan right = correlated ? plan - : ctx.current; - + /* + * @formatter:off + * + * if we have a non-correlated sub query then we put the plan as outer in a nested loop (left) because + * then that will only execute once BUT we must make sure that the query never circuit breaks + * the main query by returning zero rows (that will be catastrophic because the main query will be empty) + * so we nest it in another nested loop (left) + * with a constant scan as outer, that way we are guaranteed that we always end up with at least one row. + * + * nested loop (left) + * nested loop (left) <-- Will always return at least 1 row + * constant scan + * sub query plan + * current plan + * + * if we have a correlated sub query we simply put a nested loop with plan as inner and the existing plan will be outer. + * If we are on top (FROM part) it's safe because if that returns 0 rows we will get 0 rows in the main query. + * + * Top: + * nested loop (left) + * FROM: current plan (0 rows => whole query 0 rows) + * sub query plan (0 rows => null for subquery value) + * + * Nested: + * nested loop (left) + * nested loop (left) (0 rows => whole query 0 rows) + * FROM: current plan + * sub query plan + * sub query plan (0 rows => null for subquery value) + * + * @formatter:on + */ + + boolean correlated = !expression.getOuterReferences() + .isEmpty(); + + // If the sub query is not correlated we put the sub query as the outer in the join + // this because we only want to execute that once since it doesn't change + // We also need to flag for schema switch to stay consistent + ILogicalPlan left = correlated ? ctx.current + : plan; + ILogicalPlan right = correlated ? plan + : ctx.current; + + // If the right plan is a constant scan we don't need to wrap in a left join + // since that won't guard against anything only an extra join for no reason + if (right instanceof ConstantScan) + { + ctx.current = left; + } + else + { // Nest plan in another nested loop to guarantee at least one row if (!correlated) { @@ -445,5 +436,22 @@ public IExpression visit(UnresolvedSubQueryExpression expression, Ctx ctx) // Return an alias expression for the current sub query return builder.build(); } + + private boolean wrapInAssert(UnresolvedSubQueryExpression expression) + { + // Operator function scans always return one row + if (expression.getInput() instanceof OperatorFunctionScan) + { + return false; + } + // Constant scans always return one row + else if (expression.getInput() instanceof Projection p + && p.getInput() instanceof ConstantScan) + { + return false; + } + + return true; + } } } diff --git a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDownTest.java b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDownTest.java index 50ddd54a..404c6c06 100644 --- a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDownTest.java +++ b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/logicalplan/optimization/SubQueryExpressionPushDownTest.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Random; import java.util.Set; @@ -132,6 +133,133 @@ ORDER BY ISNULL(ar.defaultfront, 0) assertEquals(expected, actual); } + @Test + public void test_operator_function_with_nested_sub_query() + { + String q = """ + + SELECT + ( + SELECT c.name + , c.id + , ( + SELECT t2.* + FROM (c.tbl) t + CROSS APPLY (t.tbl2) t2 + WHERE t2.languagecode = 'sv' + FOR OBJECT_ARRAY + ) fields + FOR OBJECT + ) category + FROM product p + INNER JOIN category c + on c.id = p.categoryId + """; + + ILogicalPlan plan = getColumnResolvedPlan(q); + ILogicalPlan actual = optimize(context, plan); + + TableSourceReference tableProduct = new TableSourceReference(0, TableSourceReference.Type.TABLE, "", QualifiedName.of("product"), "p"); + TableSourceReference tableCategory = new TableSourceReference(1, TableSourceReference.Type.TABLE, "", QualifiedName.of("category"), "c"); + TableSourceReference t = new TableSourceReference(2, TableSourceReference.Type.EXPRESSION, "", QualifiedName.of("c.tbl"), "t"); + TableSourceReference t2 = new TableSourceReference(3, TableSourceReference.Type.EXPRESSION, "", QualifiedName.of("t.tbl2"), "t2"); + + Schema schemaProduct = Schema.of(ast("p", Type.Any, tableProduct)); + Schema schemaCategory = Schema.of(ast("c", Type.Any, tableCategory)); + + Schema schemaT = Schema.of(ast("t", Type.Any, t)); + Schema schemaT2 = Schema.of(ast("t2", Type.Any, t2)); + + //@formatter:off + Schema categorySchema = Schema.of( + nast("name", Type.Any, tableCategory), + nast("id", Type.Any, tableCategory), + col("fields", ResolvedType.table(Schema.of( + new CoreColumn("", ResolvedType.ANY, "t2.*", false, t2, CoreColumn.Type.ASTERISK))), null) + ); + + ILogicalPlan expected = projection( + new Join( + new Join( + tableScan(schemaProduct, tableProduct), + tableScan(schemaCategory, tableCategory), + Join.Type.INNER, + null, + eq(cre("id", tableCategory), cre("categoryId", tableProduct)), + Set.of(), + false, + Schema.EMPTY), + new OperatorFunctionScan( + Schema.of(nast("__expr0", ResolvedType.object(categorySchema), null)), + projection( + new Join( + ConstantScan.INSTANCE, + new OperatorFunctionScan( + Schema.of(nast("__expr1", categorySchema.getColumns().get(2).getType(), null)), + projection( + new Filter( + new Join( + new ExpressionScan(t, schemaT, ocre("tbl", tableCategory), null), + new ExpressionScan(t2, schemaT2, ocre("tbl2", t), null), + Join.Type.INNER, + null, + null, + Set.of(nast("tbl2", Type.Any, t)), + false, + Schema.of( + ast("p", Type.Any, tableProduct), + ast("c", Type.Any, tableCategory), + ast("t", Type.Any, t) + )), + null, + eq(cre("languagecode", t2), new LiteralStringExpression("sv"))), + List.of(new AsteriskExpression(QualifiedName.of("t2"), null, Set.of(t2)))), + "", + "OBJECT_ARRAY", + null), + Join.Type.LEFT, + null, + null, + Set.of(nast("tbl2", Type.Any, t), nast("tbl", Type.Any, tableCategory)), + false, + Schema.of( + ast("p", Type.Any, tableProduct), + ast("c", Type.Any, tableCategory) + )), + List.of(ocre("name", tableCategory), + ocre("id", tableCategory), + new AliasExpression(ce("__expr1", 0, categorySchema.getColumns().get(2).getType()), "fields"))), + "", + "OBJECT", + null), + Join.Type.LEFT, + null, + null, + Set.of( + nast("tbl2", Type.Any, t), + nast("name", Type.Any, tableCategory), + nast("id", Type.Any, tableCategory), + nast("tbl", Type.Any, tableCategory) + ), + false, + Schema.of( + ast("p", Type.Any, tableProduct), + ast("c", Type.Any, tableCategory) + )), + List.of(new AliasExpression(ce("__expr0", ResolvedType.object(categorySchema)), "category"))); + //@formatter:on + + // System.out.println(actual.print(0)); + // System.out.println(expected.print(0)); + + Assertions.assertThat(actual) + .usingRecursiveComparison() + .ignoringFieldsOfTypes(Location.class, Random.class) + .isEqualTo(expected); + + assertEquals(expected, actual); + } + @Test public void test_operator_function_mixed_with_sub_query_scalar_and_correlation() { @@ -193,57 +321,67 @@ public void test_operator_function_mixed_with_sub_query_scalar_and_correlation() asSet(), false, schemaProductArticle), - new OperatorFunctionScan(Schema.of(nast("__expr0", ResolvedType.table(objectArraySchema), null)), - projection( - new Join( - new Join( + projection( + new Join( + ConstantScan.INSTANCE, + new OperatorFunctionScan(Schema.of(nast("__expr1", ResolvedType.table(objectArraySchema), null)), + projection( new Join( - ConstantScan.INSTANCE, + new Join( + new Join( + ConstantScan.INSTANCE, + new MaxRowCountAssert( + projection( + tableScan(schemaArticle, tableArticle), + asList(new AliasExpression(cre("col", tableArticle), "__expr2"))), + 1), + Join.Type.LEFT, + null, + (IExpression) null, + asSet(), + false, + Schema.EMPTY), + new ExpressionScan( + e_bb, + schemaBB, + ocre("bb", tableB, ResolvedType.table(schemaTableB), CoreColumn.Type.POPULATED), + null), + Join.Type.LEFT, + null, + (IExpression) null, + asSet(), + true, + SchemaUtils.joinSchema(SchemaUtils.joinSchema(schemaProductArticle, schemaTableB, "bb"), schemaBB)), new MaxRowCountAssert( - projection( - tableScan(schemaArticle, tableArticle), - asList(new AliasExpression(cre("col", tableArticle), "__expr2"))), + projection( + new Filter( + tableScan(schemaProduct, tableProduct), + null, + eq(ocre("col", tableProductArticle), cre("col4", tableProduct))), + asList(new AliasExpression(cre("col2", tableProduct), "__expr3"))), 1), Join.Type.LEFT, null, (IExpression) null, - asSet(), + asSet(nast("col", Type.Any, tableProductArticle)), false, - Schema.EMPTY), - new ExpressionScan( - e_bb, - schemaBB, - ocre("bb", tableB, ResolvedType.table(schemaTableB), CoreColumn.Type.POPULATED), - null), - Join.Type.LEFT, - null, - (IExpression) null, - asSet(), - true, - SchemaUtils.joinSchema(SchemaUtils.joinSchema(schemaProductArticle, schemaTableB, "bb"), schemaBB)), - new MaxRowCountAssert( - projection( - new Filter( - tableScan(schemaProduct, tableProduct), - null, - eq(ocre("col", tableProductArticle), cre("col4", tableProduct))), - asList(new AliasExpression(cre("col2", tableProduct), "__expr3"))), - 1), - Join.Type.LEFT, - null, - (IExpression) null, - asSet(nast("col", Type.Any, tableProductArticle)), - false, - SchemaUtils.joinSchema( - SchemaUtils.joinSchema( - SchemaUtils.joinSchema(schemaProductArticle, schemaTableB, "bb"), schemaBB), - Schema.of(nast("__expr2", ResolvedType.of(Type.Any), tableArticle)))), - asList(new AliasExpression(cre("__expr2", tableArticle), "val"), - new AliasExpression(cre("__expr3", tableProduct), "val1"), ocre("col5", tableProductArticle)) - ), - "", - "object_array", - null), + SchemaUtils.joinSchema( + SchemaUtils.joinSchema( + SchemaUtils.joinSchema(schemaProductArticle, schemaTableB, "bb"), schemaBB), + Schema.of(nast("__expr2", ResolvedType.of(Type.Any), tableArticle)))), + asList(new AliasExpression(cre("__expr2", tableArticle), "val"), + new AliasExpression(cre("__expr3", tableProduct), "val1"), ocre("col5", tableProductArticle)) + ), + "", + "object_array", + null), + Join.Type.LEFT, + null, + (IExpression) null, + asSet(pop("bb", ResolvedType.table(schemaTableB), tableB), nast("col", Type.Any, tableProductArticle), nast("col5", Type.Any, tableProductArticle)), + false, + SchemaUtils.joinSchema(schemaProductArticle, schemaTableB, "bb")), + asList(new AliasExpression(ce("__expr1", 0, ResolvedType.table(objectArraySchema)), "__expr0"))), Join.Type.LEFT, null, (IExpression) null, @@ -941,11 +1079,11 @@ public void test_nested_sub_queries_without_for_gets_unnested() new Join( new Join( ConstantScan.INSTANCE, - new MaxRowCountAssert( - projection( + projection( + new MaxRowCountAssert( tableScan(schemaB, tableB), - asList(new AliasExpression(add(cre("col1", tableB), intLit(1)), "__expr0"))), - 1), + 1), + asList(new AliasExpression(add(cre("col1", tableB), intLit(1)), "__expr0"))), Join.Type.LEFT, null, (IExpression) null,