@@ -541,9 +541,6 @@ class JsonParseFunction : public exec::VectorFunction {
541
541
542
542
class JsonExtractFunction : public exec ::VectorFunction {
543
543
public:
544
- JsonExtractFunction (bool extractScalarOnly)
545
- : extractScalarOnly_(extractScalarOnly) {}
546
-
547
544
void apply (
548
545
const SelectivityVector& rows,
549
546
std::vector<VectorPtr>& args,
@@ -564,21 +561,7 @@ class JsonExtractFunction : public exec::VectorFunction {
564
561
context.moveOrCopyResult (localResult, rows, result);
565
562
}
566
563
567
- static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures (
568
- bool extractScalarOnly) {
569
- if (extractScalarOnly) {
570
- return {
571
- exec::FunctionSignatureBuilder ()
572
- .returnType (" varchar" )
573
- .argumentType (" json" )
574
- .argumentType (" varchar" )
575
- .build (),
576
- exec::FunctionSignatureBuilder ()
577
- .returnType (" varchar" )
578
- .argumentType (" varchar" )
579
- .argumentType (" varchar" )
580
- .build ()};
581
- }
564
+ static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures () {
582
565
return {
583
566
exec::FunctionSignatureBuilder ()
584
567
.returnType (" json" )
@@ -612,13 +595,8 @@ class JsonExtractFunction : public exec::VectorFunction {
612
595
auto jsonValue = json->as <ConstantVector<StringView>>()->valueAt (0 );
613
596
auto pathValue = path->as <ConstantVector<StringView>>()->valueAt (0 );
614
597
try {
615
- if (extractScalarOnly_) {
616
- nullResult = processJsonExtractScalar (
617
- jsonValue, pathValue, output) != simdjson::SUCCESS;
618
- } else {
619
- nullResult = processJsonExtract (jsonValue, pathValue, output) !=
620
- simdjson::SUCCESS;
621
- }
598
+ nullResult = processJsonExtract (jsonValue, pathValue, output) !=
599
+ simdjson::SUCCESS;
622
600
} catch (const VeloxException& e) {
623
601
if (!e.isUserError ()) {
624
602
throw ;
@@ -645,39 +623,21 @@ class JsonExtractFunction : public exec::VectorFunction {
645
623
exec::VectorWriter<Json> resultWriter;
646
624
resultWriter.init (*flatResult);
647
625
648
- if (extractScalarOnly_) {
649
- context.applyToSelectedNoThrow (rows, [&](auto row) {
650
- VELOX_DCHECK (!decodedPath->isNullAt (row));
651
- resultWriter.setOffset (row);
652
- std::string output;
653
- if (!decodedJson->isNullAt (row) &&
654
- processJsonExtractScalar (
655
- decodedJson->valueAt <StringView>(row),
656
- decodedPath->valueAt <StringView>(row),
657
- output) == simdjson::SUCCESS) {
658
- resultWriter.current () = output;
659
- resultWriter.commit (true );
660
- } else {
661
- resultWriter.commit (false );
662
- }
663
- });
664
- } else {
665
- context.applyToSelectedNoThrow (rows, [&](auto row) {
666
- VELOX_DCHECK (!decodedPath->isNullAt (row));
667
- resultWriter.setOffset (row);
668
- std::string output;
669
- if (!decodedJson->isNullAt (row) &&
670
- processJsonExtract (
671
- decodedJson->valueAt <StringView>(row),
672
- decodedPath->valueAt <StringView>(row),
673
- output) == simdjson::SUCCESS) {
674
- resultWriter.current () = output;
675
- resultWriter.commit (true );
676
- } else {
677
- resultWriter.commit (false );
678
- }
679
- });
680
- }
626
+ context.applyToSelectedNoThrow (rows, [&](auto row) {
627
+ VELOX_DCHECK (!decodedPath->isNullAt (row));
628
+ resultWriter.setOffset (row);
629
+ std::string output;
630
+ if (!decodedJson->isNullAt (row) &&
631
+ processJsonExtract (
632
+ decodedJson->valueAt <StringView>(row),
633
+ decodedPath->valueAt <StringView>(row),
634
+ output) == simdjson::SUCCESS) {
635
+ resultWriter.current () = output;
636
+ resultWriter.commit (true );
637
+ } else {
638
+ resultWriter.commit (false );
639
+ }
640
+ });
681
641
resultWriter.finish ();
682
642
}
683
643
@@ -750,59 +710,6 @@ class JsonExtractFunction : public exec::VectorFunction {
750
710
return simdjson::SUCCESS;
751
711
}
752
712
753
- FOLLY_ALWAYS_INLINE simdjson::error_code processJsonExtractScalar (
754
- const StringView& json,
755
- const StringView& jsonPath,
756
- std::string& output) const {
757
- bool resultPopulated = false ;
758
- std::optional<std::string> resultStr;
759
- auto consumer = [&resultStr, &resultPopulated](auto & v) {
760
- if (resultPopulated) {
761
- // We should just get a single value, if we see multiple, it's an error
762
- // and we should return null.
763
- resultStr = std::nullopt;
764
- return simdjson::SUCCESS;
765
- }
766
-
767
- resultPopulated = true ;
768
-
769
- SIMDJSON_ASSIGN_OR_RAISE (auto vtype, v.type ());
770
- switch (vtype) {
771
- case simdjson::ondemand::json_type::boolean: {
772
- SIMDJSON_ASSIGN_OR_RAISE (bool vbool, v.get_bool ());
773
- resultStr = vbool ? " true" : " false" ;
774
- break ;
775
- }
776
- case simdjson::ondemand::json_type::string: {
777
- SIMDJSON_ASSIGN_OR_RAISE (resultStr, v.get_string ());
778
- break ;
779
- }
780
- case simdjson::ondemand::json_type::object:
781
- case simdjson::ondemand::json_type::array:
782
- case simdjson::ondemand::json_type::null:
783
- // Do nothing.
784
- break ;
785
- default : {
786
- SIMDJSON_ASSIGN_OR_RAISE (resultStr, simdjson::to_json_string (v));
787
- }
788
- }
789
- return simdjson::SUCCESS;
790
- };
791
-
792
- auto & extractor = SIMDJsonExtractor::getInstance (jsonPath);
793
- bool isDefinitePath = true ;
794
- simdjson::padded_string paddedJson (json.data (), json.size ());
795
- SIMDJSON_TRY (extractor.extract (paddedJson, consumer, isDefinitePath));
796
-
797
- if (resultStr.has_value ()) {
798
- output = std::move (resultStr.value ());
799
- return simdjson::SUCCESS;
800
- } else {
801
- return simdjson::NO_SUCH_FIELD;
802
- }
803
- }
804
-
805
- bool extractScalarOnly_{false };
806
713
JsonParseImpl parser_;
807
714
};
808
715
@@ -856,37 +763,13 @@ VELOX_DECLARE_VECTOR_FUNCTION(
856
763
JsonFormatFunction::signatures (),
857
764
std::make_unique<JsonFormatFunction>());
858
765
859
- VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION (
860
- udf_json_extract_scalar,
861
- JsonExtractFunction::signatures (true ),
862
- [](const std::string& /* name*/ ,
863
- const std::vector<exec::VectorFunctionArg>&,
864
- const velox::core::QueryConfig&) {
865
- return std::make_shared<JsonExtractFunction>(true );
866
- });
867
-
868
- // Only used internally at Meta.
869
- VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION (
870
- udf_json_extract_scalar_varchar_only,
871
- (std::vector<std::shared_ptr<exec::FunctionSignature>>{
872
- facebook::velox::exec::FunctionSignatureBuilder ()
873
- .returnType (" varchar" )
874
- .argumentType (" varchar" )
875
- .argumentType (" varchar" )
876
- .build ()}),
877
- [](const std::string& /* name*/ ,
878
- const std::vector<exec::VectorFunctionArg>&,
879
- const velox::core::QueryConfig&) {
880
- return std::make_shared<JsonExtractFunction>(true );
881
- });
882
-
883
766
VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION (
884
767
udf_json_extract,
885
- JsonExtractFunction::signatures (false ),
768
+ JsonExtractFunction::signatures (),
886
769
[](const std::string& /* name*/ ,
887
770
const std::vector<exec::VectorFunctionArg>&,
888
771
const velox::core::QueryConfig&) {
889
- return std::make_shared<JsonExtractFunction>(false );
772
+ return std::make_shared<JsonExtractFunction>();
890
773
});
891
774
892
775
VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION (
0 commit comments