From dc758a5f1fb65464208d387b75dd12535605e16f Mon Sep 17 00:00:00 2001 From: Pramod Satya Date: Tue, 11 Feb 2025 12:54:21 -0600 Subject: [PATCH] feat: Capture constant folding errors during expression compilation --- velox/expression/Expr.cpp | 6 ++-- velox/expression/Expr.h | 20 +++++++++++- velox/expression/ExprCompiler.cpp | 51 +++++++++++++++++++------------ velox/expression/ExprCompiler.h | 3 +- 4 files changed, 57 insertions(+), 23 deletions(-) diff --git a/velox/expression/Expr.cpp b/velox/expression/Expr.cpp index 4edf2810db3e..70947d679155 100644 --- a/velox/expression/Expr.cpp +++ b/velox/expression/Expr.cpp @@ -1753,9 +1753,11 @@ std::vector Expr::extractSubfields() const { ExprSet::ExprSet( const std::vector& sources, core::ExecCtx* execCtx, - bool enableConstantFolding) + bool enableConstantFolding, + bool ignoreConstantFoldError) : execCtx_(execCtx) { - exprs_ = compileExpressions(sources, execCtx, this, enableConstantFolding); + exprs_ = compileExpressions( + sources, execCtx, this, enableConstantFolding, ignoreConstantFoldError); std::vector allDistinctFields; for (auto& expr : exprs_) { Expr::mergeFields( diff --git a/velox/expression/Expr.h b/velox/expression/Expr.h index c0cdf0fb85e4..b83b4082187f 100644 --- a/velox/expression/Expr.h +++ b/velox/expression/Expr.h @@ -702,12 +702,30 @@ using ExprPtr = std::shared_ptr; // this ExprSet. If however such an association cannot be made with certainty, // then its advisable to pre-load all lazy vectors to avoid issues associated // with partial loading. +// +// The input typed expressions are compiled in the constructor to exec::Expr +// using the helper function compileExpressions. Constant folding is enabled by +// default during expression compilation and it can be disabled by setting the +// argument enableConstantFolding to false. Constant folding has a subtle +// gotcha: if folding a constant expression deterministically throws, we can't +// throw at expression compilation time yet because we can't guarantee that this +// expression would actually need to be evaluated. +// +// If folding an expression throws an exception, the user can choose whether to +// just ignore it or receive a FailFunction in place of the expression by +// setting argument ignoreConstantFoldError to either true or false +// respectively. By default, we just ignore such exceptions and leave the +// expression as-is. If this expression is hit at execution time and needs to be +// evaluated, it will throw and fail the query anyway. If not, in case this +// expression is never hit at execution time (for instance, if other arguments +// are all null in a function with default null behavior), the query won't fail. class ExprSet { public: explicit ExprSet( const std::vector& source, core::ExecCtx* execCtx, - bool enableConstantFolding = true); + bool enableConstantFolding = true, + bool ignoreConstantFoldError = true); virtual ~ExprSet(); diff --git a/velox/expression/ExprCompiler.cpp b/velox/expression/ExprCompiler.cpp index a678509b4ba6..309eaefc1bba 100644 --- a/velox/expression/ExprCompiler.cpp +++ b/velox/expression/ExprCompiler.cpp @@ -168,7 +168,8 @@ ExprPtr compileExpression( const core::QueryConfig& config, memory::MemoryPool* pool, const std::unordered_set& flatteningCandidates, - bool enableConstantFolding); + bool enableConstantFolding, + bool ignoreConstantFoldError = true); std::vector compileInputs( const TypedExprPtr& expr, @@ -299,7 +300,10 @@ std::shared_ptr compileLambda( config.exprTrackCpuUsage()); } -ExprPtr tryFoldIfConstant(const ExprPtr& expr, Scope* scope) { +ExprPtr tryFoldIfConstant( + const ExprPtr& expr, + Scope* scope, + bool ignoreConstantFoldError) { if (expr->isConstant() && scope->exprSet->execCtx()) { try { auto rowType = ROW({}, {}); @@ -324,18 +328,22 @@ ExprPtr tryFoldIfConstant(const ExprPtr& expr, Scope* scope) { } return resultExpr; } - // Constant folding has a subtle gotcha: if folding a constant expression - // deterministically throws, we can't throw at expression compilation time - // yet because we can't guarantee that this expression would actually need - // to be evaluated. - // - // So, here, if folding an expression throws an exception, we just ignore it - // and leave the expression as-is. If this expression is hit at execution - // time and needs to be evaluated, it will throw and fail the query anyway. - // If not, in case this expression is never hit at execution time (for - // instance, if other arguments are all null in a function with default null - // behavior), the query won't fail. catch (const VeloxUserError&) { + if (!ignoreConstantFoldError) { + // TODO: Map errors to different error codes as per StandardErrorCode + // in presto-spi. + int errorCode = 8; + std::string errorMessage = "ignore failure message"; + auto failTypedExpr = std::make_shared( + UNKNOWN(), + std::vector{ + std::make_shared(INTEGER(), errorCode), + std::make_shared( + VARCHAR(), errorMessage)}, + "fail"); + auto failExprSet = ExprSet({failTypedExpr}, scope->exprSet->execCtx()); + return failExprSet.exprs().front(); + } } } return expr; @@ -372,7 +380,8 @@ ExprPtr compileRewrittenExpression( const core::QueryConfig& config, memory::MemoryPool* pool, const std::unordered_set& flatteningCandidates, - bool enableConstantFolding) { + bool enableConstantFolding, + bool ignoreConstantFoldError) { ExprPtr alreadyCompiled = getAlreadyCompiled(expr.get(), &scope->visited); if (alreadyCompiled) { if (!alreadyCompiled->isMultiplyReferenced()) { @@ -522,7 +531,7 @@ ExprPtr compileRewrittenExpression( // If the expression is constant folding it is redundant. auto folded = enableConstantFolding && !isConstantExpr - ? tryFoldIfConstant(result, scope) + ? tryFoldIfConstant(result, scope, ignoreConstantFoldError) : result; scope->visited[expr.get()] = folded; return folded; @@ -534,7 +543,8 @@ ExprPtr compileExpression( const core::QueryConfig& config, memory::MemoryPool* pool, const std::unordered_set& flatteningCandidates, - bool enableConstantFolding) { + bool enableConstantFolding, + bool ignoreConstantFoldError) { auto rewritten = rewriteExpression(expr); if (rewritten.get() != expr.get()) { scope->rewrittenExpressions.push_back(rewritten); @@ -545,7 +555,8 @@ ExprPtr compileExpression( config, pool, flatteningCandidates, - enableConstantFolding); + enableConstantFolding, + ignoreConstantFoldError); } /// Walk expression tree and collect names of functions used in CallTypedExpr @@ -590,7 +601,8 @@ std::vector> compileExpressions( const std::vector& sources, core::ExecCtx* execCtx, ExprSet* exprSet, - bool enableConstantFolding) { + bool enableConstantFolding, + bool ignoreConstantFoldError) { Scope scope({}, nullptr, exprSet); std::vector> exprs; exprs.reserve(sources.size()); @@ -606,7 +618,8 @@ std::vector> compileExpressions( execCtx->queryCtx()->queryConfig(), execCtx->pool(), flatteningCandidates, - enableConstantFolding)); + enableConstantFolding, + ignoreConstantFoldError)); } return exprs; } diff --git a/velox/expression/ExprCompiler.h b/velox/expression/ExprCompiler.h index a6400834a718..bc08162cb6a4 100644 --- a/velox/expression/ExprCompiler.h +++ b/velox/expression/ExprCompiler.h @@ -29,6 +29,7 @@ std::vector> compileExpressions( const std::vector& sources, core::ExecCtx* execCtx, ExprSet* exprSet, - bool enableConstantFolding = true); + bool enableConstantFolding = true, + bool ignoreConstantFoldError = true); } // namespace facebook::velox::exec