be/src/exprs/function/function_jsonb.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstdlib> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <string_view> |
25 | | #include <tuple> |
26 | | #include <type_traits> |
27 | | #include <utility> |
28 | | #include <variant> |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "common/status.h" |
32 | | #include "core/assert_cast.h" |
33 | | #include "core/block/block.h" |
34 | | #include "core/block/column_numbers.h" |
35 | | #include "core/block/column_with_type_and_name.h" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_array.h" |
38 | | #include "core/column/column_const.h" |
39 | | #include "core/column/column_nullable.h" |
40 | | #include "core/column/column_string.h" |
41 | | #include "core/column/column_vector.h" |
42 | | #include "core/custom_allocator.h" |
43 | | #include "core/data_type/data_type.h" |
44 | | #include "core/data_type/data_type_array.h" |
45 | | #include "core/data_type/data_type_jsonb.h" |
46 | | #include "core/data_type/data_type_nullable.h" |
47 | | #include "core/data_type/data_type_string.h" |
48 | | #include "core/data_type/define_primitive_type.h" |
49 | | #include "core/data_type/primitive_type.h" |
50 | | #include "core/string_ref.h" |
51 | | #include "core/types.h" |
52 | | #include "core/value/jsonb_value.h" |
53 | | #include "exec/common/stringop_substring.h" |
54 | | #include "exec/common/template_helpers.hpp" |
55 | | #include "exec/common/util.hpp" |
56 | | #include "exprs/aggregate/aggregate_function.h" |
57 | | #include "exprs/function/function.h" |
58 | | #include "exprs/function/like.h" |
59 | | #include "exprs/function/simple_function_factory.h" |
60 | | #include "exprs/function_context.h" |
61 | | #include "util/jsonb_document.h" |
62 | | #include "util/jsonb_stream.h" |
63 | | #include "util/jsonb_utils.h" |
64 | | #include "util/jsonb_writer.h" |
65 | | #include "util/simd/bits.h" |
66 | | |
67 | | namespace doris { |
68 | | |
69 | | enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT }; |
70 | | |
71 | | enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE }; |
72 | | |
73 | | // func(string,string) -> json |
74 | | template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode> |
75 | | class FunctionJsonbParseBase : public IFunction { |
76 | | private: |
77 | | struct FunctionJsonbParseState { |
78 | | StringRef default_value; |
79 | | JsonBinaryValue default_value_parser; |
80 | | bool has_const_default_value = false; |
81 | | bool default_is_null = false; |
82 | | }; |
83 | | |
84 | | public: |
85 | | static constexpr auto name = "json_parse"; |
86 | | static constexpr auto alias = "jsonb_parse"; |
87 | 17 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv Line | Count | Source | 87 | 9 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv Line | Count | Source | 87 | 3 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv Line | Count | Source | 87 | 5 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
|
88 | | |
89 | 4 | String get_name() const override { |
90 | 4 | String error_mode; |
91 | 4 | switch (parse_error_handle_mode) { |
92 | 1 | case JsonbParseErrorMode::FAIL: |
93 | 1 | break; |
94 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
95 | 1 | error_mode = "_error_to_null"; |
96 | 1 | break; |
97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: |
98 | 2 | error_mode = "_error_to_value"; |
99 | 2 | break; |
100 | 4 | } |
101 | | |
102 | 4 | return name + error_mode; |
103 | 4 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 1 | case JsonbParseErrorMode::FAIL: | 93 | 1 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 1 | error_mode = "_error_to_null"; | 96 | 1 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 2 | String get_name() const override { | 90 | 2 | String error_mode; | 91 | 2 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 2 | error_mode = "_error_to_value"; | 99 | 2 | break; | 100 | 2 | } | 101 | | | 102 | 2 | return name + error_mode; | 103 | 2 | } |
|
104 | | |
105 | 15 | bool is_variadic() const override { |
106 | 15 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; |
107 | 15 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv Line | Count | Source | 105 | 8 | bool is_variadic() const override { | 106 | 8 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 8 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv Line | Count | Source | 105 | 2 | bool is_variadic() const override { | 106 | 2 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 2 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv Line | Count | Source | 105 | 5 | bool is_variadic() const override { | 106 | 5 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 5 | } |
|
108 | | |
109 | 9 | size_t get_number_of_arguments() const override { |
110 | 9 | switch (parse_error_handle_mode) { |
111 | 7 | case JsonbParseErrorMode::FAIL: |
112 | 7 | return 1; |
113 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
114 | 1 | return 1; |
115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: |
116 | 1 | return 0; |
117 | 9 | } |
118 | 9 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv Line | Count | Source | 109 | 7 | size_t get_number_of_arguments() const override { | 110 | 7 | switch (parse_error_handle_mode) { | 111 | 7 | case JsonbParseErrorMode::FAIL: | 112 | 7 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 7 | } | 118 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv Line | Count | Source | 109 | 1 | size_t get_number_of_arguments() const override { | 110 | 1 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 1 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 1 | } | 118 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv Line | Count | Source | 109 | 1 | size_t get_number_of_arguments() const override { | 110 | 1 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 1 | return 0; | 117 | 1 | } | 118 | 1 | } |
|
119 | | |
120 | 11 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
121 | 11 | bool is_nullable = false; |
122 | 11 | switch (nullable_mode) { |
123 | 1 | case NullalbeMode::NULLABLE: |
124 | 1 | is_nullable = true; |
125 | 1 | break; |
126 | 10 | case NullalbeMode::FOLLOW_INPUT: { |
127 | 14 | for (auto arg : arguments) { |
128 | 14 | is_nullable |= arg->is_nullable(); |
129 | 14 | } |
130 | 10 | break; |
131 | 0 | } |
132 | 11 | } |
133 | | |
134 | 11 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) |
135 | 11 | : std::make_shared<DataTypeJsonb>(); |
136 | 11 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 7 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 7 | bool is_nullable = false; | 122 | 7 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 7 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 7 | for (auto arg : arguments) { | 128 | 7 | is_nullable |= arg->is_nullable(); | 129 | 7 | } | 130 | 7 | break; | 131 | 0 | } | 132 | 7 | } | 133 | | | 134 | 7 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 7 | : std::make_shared<DataTypeJsonb>(); | 136 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 1 | bool is_nullable = false; | 122 | 1 | switch (nullable_mode) { | 123 | 1 | case NullalbeMode::NULLABLE: | 124 | 1 | is_nullable = true; | 125 | 1 | break; | 126 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 0 | for (auto arg : arguments) { | 128 | 0 | is_nullable |= arg->is_nullable(); | 129 | 0 | } | 130 | 0 | break; | 131 | 0 | } | 132 | 1 | } | 133 | | | 134 | 1 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 1 | : std::make_shared<DataTypeJsonb>(); | 136 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 3 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 3 | bool is_nullable = false; | 122 | 3 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 3 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 7 | for (auto arg : arguments) { | 128 | 7 | is_nullable |= arg->is_nullable(); | 129 | 7 | } | 130 | 3 | break; | 131 | 0 | } | 132 | 3 | } | 133 | | | 134 | 3 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 3 | : std::make_shared<DataTypeJsonb>(); | 136 | 3 | } |
|
137 | | |
138 | 21 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 14 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 5 | bool use_default_implementation_for_nulls() const override { return false; } |
|
139 | | |
140 | 20 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
141 | 20 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
142 | 10 | std::shared_ptr<FunctionJsonbParseState> state = |
143 | 10 | std::make_shared<FunctionJsonbParseState>(); |
144 | 10 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); |
145 | 10 | } |
146 | 20 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
147 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
148 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
149 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
150 | 2 | if (state) { |
151 | 2 | if (context->get_num_args() == 2) { |
152 | 1 | if (context->is_col_constant(1)) { |
153 | 0 | const auto default_value_col = context->get_constant_col(1)->column_ptr; |
154 | 0 | if (default_value_col->is_null_at(0)) { |
155 | 0 | state->default_is_null = true; |
156 | 0 | } else { |
157 | 0 | const auto& default_value = default_value_col->get_data_at(0); |
158 | |
|
159 | 0 | state->default_value = default_value; |
160 | 0 | state->has_const_default_value = true; |
161 | 0 | } |
162 | 0 | } |
163 | 1 | } else if (context->get_num_args() == 1) { |
164 | 0 | RETURN_IF_ERROR( |
165 | 0 | state->default_value_parser.from_json_string(std::string("{}"))); |
166 | 0 | state->default_value = StringRef(state->default_value_parser.value(), |
167 | 0 | state->default_value_parser.size()); |
168 | 0 | state->has_const_default_value = true; |
169 | 0 | } |
170 | 2 | } |
171 | 2 | } |
172 | | |
173 | 4 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { |
174 | 1 | return Status::InvalidArgument( |
175 | 1 | "{} function should have 1 or 2 arguments, " |
176 | 1 | "but got {}", |
177 | 1 | get_name(), context->get_num_args()); |
178 | 1 | } |
179 | 4 | } |
180 | 3 | return Status::OK(); |
181 | 20 | } _ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 14 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 14 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 7 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 7 | std::make_shared<FunctionJsonbParseState>(); | 144 | 7 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 7 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 14 | return Status::OK(); | 181 | 14 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 2 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 2 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 1 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 1 | std::make_shared<FunctionJsonbParseState>(); | 144 | 1 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 1 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 2 | return Status::OK(); | 181 | 2 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 4 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 2 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 2 | std::make_shared<FunctionJsonbParseState>(); | 144 | 2 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 2 | } | 146 | 4 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | 2 | if (state) { | 151 | 2 | if (context->get_num_args() == 2) { | 152 | 1 | if (context->is_col_constant(1)) { | 153 | 0 | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | 0 | if (default_value_col->is_null_at(0)) { | 155 | 0 | state->default_is_null = true; | 156 | 0 | } else { | 157 | 0 | const auto& default_value = default_value_col->get_data_at(0); | 158 | |
| 159 | 0 | state->default_value = default_value; | 160 | 0 | state->has_const_default_value = true; | 161 | 0 | } | 162 | 0 | } | 163 | 1 | } else if (context->get_num_args() == 1) { | 164 | 0 | RETURN_IF_ERROR( | 165 | 0 | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | 0 | state->default_value = StringRef(state->default_value_parser.value(), | 167 | 0 | state->default_value_parser.size()); | 168 | 0 | state->has_const_default_value = true; | 169 | 0 | } | 170 | 2 | } | 171 | 2 | } | 172 | | | 173 | 4 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | 1 | return Status::InvalidArgument( | 175 | 1 | "{} function should have 1 or 2 arguments, " | 176 | 1 | "but got {}", | 177 | 1 | get_name(), context->get_num_args()); | 178 | 1 | } | 179 | 4 | } | 180 | 3 | return Status::OK(); | 181 | 4 | } |
|
182 | | |
183 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
184 | 10 | uint32_t result, size_t input_rows_count) const override { |
185 | 10 | auto&& [col_from, col_from_is_const] = |
186 | 10 | unpack_if_const(block.get_by_position(arguments[0]).column); |
187 | | |
188 | 10 | if (col_from_is_const && col_from->is_null_at(0)) { |
189 | 0 | auto col_str = ColumnString::create(); |
190 | 0 | col_str->insert_default(); |
191 | 0 | auto null_map = ColumnUInt8::create(1, 1); |
192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); |
193 | 0 | block.get_by_position(result).column = |
194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); |
195 | 0 | return Status::OK(); |
196 | 0 | } |
197 | | |
198 | 10 | auto null_map = ColumnUInt8::create(0, 0); |
199 | 10 | bool is_nullable = false; |
200 | | |
201 | 10 | switch (nullable_mode) { |
202 | 1 | case NullalbeMode::NULLABLE: { |
203 | 1 | is_nullable = true; |
204 | 1 | break; |
205 | 0 | } |
206 | 9 | case NullalbeMode::FOLLOW_INPUT: { |
207 | 11 | for (auto arg : arguments) { |
208 | 11 | is_nullable |= block.get_by_position(arg).type->is_nullable(); |
209 | 11 | } |
210 | 9 | break; |
211 | 0 | } |
212 | 10 | } |
213 | | |
214 | 10 | if (is_nullable) { |
215 | 9 | null_map = ColumnUInt8::create(input_rows_count, 0); |
216 | 9 | } |
217 | | |
218 | 8 | const ColumnString* col_from_string = nullptr; |
219 | 10 | if (col_from->is_nullable()) { |
220 | 9 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); |
221 | | |
222 | 9 | VectorizedUtils::update_null_map(null_map->get_data(), |
223 | 9 | nullable_col.get_null_map_data()); |
224 | 9 | col_from_string = |
225 | 9 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); |
226 | 9 | } else { |
227 | 1 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); |
228 | 1 | } |
229 | | |
230 | 8 | StringRef constant_default_value; |
231 | 8 | bool default_value_const = false; |
232 | 8 | bool default_value_null_const = false; |
233 | 8 | ColumnPtr default_value_col; |
234 | 8 | JsonBinaryValue default_jsonb_value_parser; |
235 | 8 | const ColumnString* default_value_str_col = nullptr; |
236 | 8 | const NullMap* default_value_nullmap = nullptr; |
237 | 8 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
238 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
239 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
240 | 2 | if (state && state->has_const_default_value) { |
241 | 0 | constant_default_value = state->default_value; |
242 | 0 | default_value_null_const = state->default_is_null; |
243 | 0 | default_value_const = true; |
244 | 2 | } else if (arguments.size() > 1) { |
245 | 2 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != |
246 | 2 | PrimitiveType::TYPE_JSONB) { |
247 | 1 | return Status::InvalidArgument( |
248 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), |
249 | 1 | block.get_by_position(arguments[1]).type->get_name()); |
250 | 1 | } |
251 | 1 | std::tie(default_value_col, default_value_const) = |
252 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); |
253 | 1 | if (default_value_const) { |
254 | 0 | const JsonbDocument* default_value_doc = nullptr; |
255 | 0 | if (default_value_col->is_null_at(0)) { |
256 | 0 | default_value_null_const = true; |
257 | 0 | } else { |
258 | 0 | auto data = default_value_col->get_data_at(0); |
259 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, |
260 | 0 | &default_value_doc)); |
261 | 0 | constant_default_value = data; |
262 | 0 | } |
263 | 1 | } else { |
264 | 1 | if (default_value_col->is_nullable()) { |
265 | 1 | const auto& nullable_col = |
266 | 1 | assert_cast<const ColumnNullable&>(*default_value_col); |
267 | 1 | default_value_str_col = assert_cast<const ColumnString*>( |
268 | 1 | nullable_col.get_nested_column_ptr().get()); |
269 | 1 | default_value_nullmap = &(nullable_col.get_null_map_data()); |
270 | 1 | } else { |
271 | 0 | default_value_str_col = |
272 | 0 | assert_cast<const ColumnString*>(default_value_col.get()); |
273 | 0 | } |
274 | 1 | } |
275 | 1 | } else if (arguments.size() == 1) { |
276 | | // parse default value '{}' should always success. |
277 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); |
278 | 0 | default_value_const = true; |
279 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); |
280 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); |
281 | 0 | } |
282 | 2 | } |
283 | | |
284 | 1 | auto col_to = ColumnString::create(); |
285 | | |
286 | 8 | col_to->reserve(input_rows_count); |
287 | | |
288 | 8 | auto& null_map_data = null_map->get_data(); |
289 | | |
290 | | // parser can be reused for performance |
291 | 8 | JsonBinaryValue jsonb_value; |
292 | | |
293 | 79 | for (size_t i = 0; i < input_rows_count; ++i) { |
294 | 69 | if (is_nullable && null_map_data[i]) { |
295 | 3 | col_to->insert_default(); |
296 | 3 | continue; |
297 | 3 | } |
298 | | |
299 | 66 | auto index = index_check_const(i, col_from_is_const); |
300 | 66 | const auto& val = col_from_string->get_data_at(index); |
301 | 66 | auto st = jsonb_value.from_json_string(val.data, val.size); |
302 | 66 | if (st.ok()) { |
303 | | // insert jsonb format data |
304 | 48 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); |
305 | 48 | } else { |
306 | 18 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { |
307 | 6 | return Status::InvalidArgument( |
308 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); |
309 | 6 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { |
310 | 6 | null_map_data[i] = 1; |
311 | 6 | col_to->insert_default(); |
312 | 6 | } else { |
313 | 6 | if (default_value_const) { |
314 | 0 | if (default_value_null_const) { |
315 | 0 | null_map_data[i] = 1; |
316 | 0 | col_to->insert_default(); |
317 | 0 | } else { |
318 | 0 | col_to->insert_data(constant_default_value.data, |
319 | 0 | constant_default_value.size); |
320 | 0 | } |
321 | 6 | } else { |
322 | 6 | if (default_value_nullmap && (*default_value_nullmap)[i]) { |
323 | 0 | null_map_data[i] = 1; |
324 | 0 | col_to->insert_default(); |
325 | 0 | continue; |
326 | 0 | } |
327 | 6 | auto value = default_value_str_col->get_data_at(i); |
328 | 6 | col_to->insert_data(value.data, value.size); |
329 | 6 | } |
330 | 6 | } |
331 | 18 | } |
332 | 66 | } |
333 | | |
334 | 10 | if (is_nullable) { |
335 | 3 | block.replace_by_position( |
336 | 3 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); |
337 | 7 | } else { |
338 | 7 | block.replace_by_position(result, std::move(col_to)); |
339 | 7 | } |
340 | | |
341 | 2 | return Status::OK(); |
342 | 2 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 7 | uint32_t result, size_t input_rows_count) const override { | 185 | 7 | auto&& [col_from, col_from_is_const] = | 186 | 7 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 7 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 7 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 7 | bool is_nullable = false; | 200 | | | 201 | 7 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 7 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 7 | for (auto arg : arguments) { | 208 | 7 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 7 | } | 210 | 7 | break; | 211 | 0 | } | 212 | 7 | } | 213 | | | 214 | 7 | if (is_nullable) { | 215 | 7 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 7 | } | 217 | | | 218 | 7 | const ColumnString* col_from_string = nullptr; | 219 | 7 | if (col_from->is_nullable()) { | 220 | 7 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 7 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 7 | nullable_col.get_null_map_data()); | 224 | 7 | col_from_string = | 225 | 7 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 7 | } else { | 227 | 0 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 0 | } | 229 | | | 230 | 7 | StringRef constant_default_value; | 231 | 7 | bool default_value_const = false; | 232 | 7 | bool default_value_null_const = false; | 233 | 7 | ColumnPtr default_value_col; | 234 | 7 | JsonBinaryValue default_jsonb_value_parser; | 235 | 7 | const ColumnString* default_value_str_col = nullptr; | 236 | 7 | const NullMap* default_value_nullmap = nullptr; | 237 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | | if (state && state->has_const_default_value) { | 241 | | constant_default_value = state->default_value; | 242 | | default_value_null_const = state->default_is_null; | 243 | | default_value_const = true; | 244 | | } else if (arguments.size() > 1) { | 245 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | | PrimitiveType::TYPE_JSONB) { | 247 | | return Status::InvalidArgument( | 248 | | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | | block.get_by_position(arguments[1]).type->get_name()); | 250 | | } | 251 | | std::tie(default_value_col, default_value_const) = | 252 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | | if (default_value_const) { | 254 | | const JsonbDocument* default_value_doc = nullptr; | 255 | | if (default_value_col->is_null_at(0)) { | 256 | | default_value_null_const = true; | 257 | | } else { | 258 | | auto data = default_value_col->get_data_at(0); | 259 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | | &default_value_doc)); | 261 | | constant_default_value = data; | 262 | | } | 263 | | } else { | 264 | | if (default_value_col->is_nullable()) { | 265 | | const auto& nullable_col = | 266 | | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | | default_value_str_col = assert_cast<const ColumnString*>( | 268 | | nullable_col.get_nested_column_ptr().get()); | 269 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | | } else { | 271 | | default_value_str_col = | 272 | | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | | } | 274 | | } | 275 | | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | | default_value_const = true; | 279 | | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | | } | 282 | | } | 283 | | | 284 | 7 | auto col_to = ColumnString::create(); | 285 | | | 286 | 7 | col_to->reserve(input_rows_count); | 287 | | | 288 | 7 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 7 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 30 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 23 | if (is_nullable && null_map_data[i]) { | 295 | 1 | col_to->insert_default(); | 296 | 1 | continue; | 297 | 1 | } | 298 | | | 299 | 22 | auto index = index_check_const(i, col_from_is_const); | 300 | 22 | const auto& val = col_from_string->get_data_at(index); | 301 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 22 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 16 | } else { | 306 | 6 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | 6 | return Status::InvalidArgument( | 308 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | | null_map_data[i] = 1; | 311 | | col_to->insert_default(); | 312 | | } else { | 313 | | if (default_value_const) { | 314 | | if (default_value_null_const) { | 315 | | null_map_data[i] = 1; | 316 | | col_to->insert_default(); | 317 | | } else { | 318 | | col_to->insert_data(constant_default_value.data, | 319 | | constant_default_value.size); | 320 | | } | 321 | | } else { | 322 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | | null_map_data[i] = 1; | 324 | | col_to->insert_default(); | 325 | | continue; | 326 | | } | 327 | | auto value = default_value_str_col->get_data_at(i); | 328 | | col_to->insert_data(value.data, value.size); | 329 | | } | 330 | | } | 331 | 6 | } | 332 | 22 | } | 333 | | | 334 | 7 | if (is_nullable) { | 335 | 1 | block.replace_by_position( | 336 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 6 | } else { | 338 | 6 | block.replace_by_position(result, std::move(col_to)); | 339 | 6 | } | 340 | | | 341 | 7 | return Status::OK(); | 342 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 1 | uint32_t result, size_t input_rows_count) const override { | 185 | 1 | auto&& [col_from, col_from_is_const] = | 186 | 1 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 1 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 1 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 1 | bool is_nullable = false; | 200 | | | 201 | 1 | switch (nullable_mode) { | 202 | 1 | case NullalbeMode::NULLABLE: { | 203 | 1 | is_nullable = true; | 204 | 1 | break; | 205 | 0 | } | 206 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 0 | for (auto arg : arguments) { | 208 | 0 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 0 | } | 210 | 0 | break; | 211 | 0 | } | 212 | 1 | } | 213 | | | 214 | 1 | if (is_nullable) { | 215 | 1 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 1 | } | 217 | | | 218 | 1 | const ColumnString* col_from_string = nullptr; | 219 | 1 | if (col_from->is_nullable()) { | 220 | 1 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 1 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 1 | nullable_col.get_null_map_data()); | 224 | 1 | col_from_string = | 225 | 1 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 1 | } else { | 227 | 0 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 0 | } | 229 | | | 230 | 1 | StringRef constant_default_value; | 231 | 1 | bool default_value_const = false; | 232 | 1 | bool default_value_null_const = false; | 233 | 1 | ColumnPtr default_value_col; | 234 | 1 | JsonBinaryValue default_jsonb_value_parser; | 235 | 1 | const ColumnString* default_value_str_col = nullptr; | 236 | 1 | const NullMap* default_value_nullmap = nullptr; | 237 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | | if (state && state->has_const_default_value) { | 241 | | constant_default_value = state->default_value; | 242 | | default_value_null_const = state->default_is_null; | 243 | | default_value_const = true; | 244 | | } else if (arguments.size() > 1) { | 245 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | | PrimitiveType::TYPE_JSONB) { | 247 | | return Status::InvalidArgument( | 248 | | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | | block.get_by_position(arguments[1]).type->get_name()); | 250 | | } | 251 | | std::tie(default_value_col, default_value_const) = | 252 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | | if (default_value_const) { | 254 | | const JsonbDocument* default_value_doc = nullptr; | 255 | | if (default_value_col->is_null_at(0)) { | 256 | | default_value_null_const = true; | 257 | | } else { | 258 | | auto data = default_value_col->get_data_at(0); | 259 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | | &default_value_doc)); | 261 | | constant_default_value = data; | 262 | | } | 263 | | } else { | 264 | | if (default_value_col->is_nullable()) { | 265 | | const auto& nullable_col = | 266 | | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | | default_value_str_col = assert_cast<const ColumnString*>( | 268 | | nullable_col.get_nested_column_ptr().get()); | 269 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | | } else { | 271 | | default_value_str_col = | 272 | | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | | } | 274 | | } | 275 | | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | | default_value_const = true; | 279 | | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | | } | 282 | | } | 283 | | | 284 | 1 | auto col_to = ColumnString::create(); | 285 | | | 286 | 1 | col_to->reserve(input_rows_count); | 287 | | | 288 | 1 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 1 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 24 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 23 | if (is_nullable && null_map_data[i]) { | 295 | 1 | col_to->insert_default(); | 296 | 1 | continue; | 297 | 1 | } | 298 | | | 299 | 22 | auto index = index_check_const(i, col_from_is_const); | 300 | 22 | const auto& val = col_from_string->get_data_at(index); | 301 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 22 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 16 | } else { | 306 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | | return Status::InvalidArgument( | 308 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | 6 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | 6 | null_map_data[i] = 1; | 311 | 6 | col_to->insert_default(); | 312 | | } else { | 313 | | if (default_value_const) { | 314 | | if (default_value_null_const) { | 315 | | null_map_data[i] = 1; | 316 | | col_to->insert_default(); | 317 | | } else { | 318 | | col_to->insert_data(constant_default_value.data, | 319 | | constant_default_value.size); | 320 | | } | 321 | | } else { | 322 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | | null_map_data[i] = 1; | 324 | | col_to->insert_default(); | 325 | | continue; | 326 | | } | 327 | | auto value = default_value_str_col->get_data_at(i); | 328 | | col_to->insert_data(value.data, value.size); | 329 | | } | 330 | | } | 331 | 6 | } | 332 | 22 | } | 333 | | | 334 | 1 | if (is_nullable) { | 335 | 1 | block.replace_by_position( | 336 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 1 | } else { | 338 | 0 | block.replace_by_position(result, std::move(col_to)); | 339 | 0 | } | 340 | | | 341 | 1 | return Status::OK(); | 342 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 2 | uint32_t result, size_t input_rows_count) const override { | 185 | 2 | auto&& [col_from, col_from_is_const] = | 186 | 2 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 2 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 2 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 2 | bool is_nullable = false; | 200 | | | 201 | 2 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 2 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 4 | for (auto arg : arguments) { | 208 | 4 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 4 | } | 210 | 2 | break; | 211 | 0 | } | 212 | 2 | } | 213 | | | 214 | 2 | if (is_nullable) { | 215 | 1 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 1 | } | 217 | | | 218 | 2 | const ColumnString* col_from_string = nullptr; | 219 | 2 | if (col_from->is_nullable()) { | 220 | 1 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 1 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 1 | nullable_col.get_null_map_data()); | 224 | 1 | col_from_string = | 225 | 1 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 1 | } else { | 227 | 1 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 1 | } | 229 | | | 230 | 2 | StringRef constant_default_value; | 231 | 2 | bool default_value_const = false; | 232 | 2 | bool default_value_null_const = false; | 233 | 2 | ColumnPtr default_value_col; | 234 | 2 | JsonBinaryValue default_jsonb_value_parser; | 235 | 2 | const ColumnString* default_value_str_col = nullptr; | 236 | 2 | const NullMap* default_value_nullmap = nullptr; | 237 | 2 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | 2 | if (state && state->has_const_default_value) { | 241 | 0 | constant_default_value = state->default_value; | 242 | 0 | default_value_null_const = state->default_is_null; | 243 | 0 | default_value_const = true; | 244 | 2 | } else if (arguments.size() > 1) { | 245 | 2 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | 2 | PrimitiveType::TYPE_JSONB) { | 247 | 1 | return Status::InvalidArgument( | 248 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | 1 | block.get_by_position(arguments[1]).type->get_name()); | 250 | 1 | } | 251 | 1 | std::tie(default_value_col, default_value_const) = | 252 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | 1 | if (default_value_const) { | 254 | 0 | const JsonbDocument* default_value_doc = nullptr; | 255 | 0 | if (default_value_col->is_null_at(0)) { | 256 | 0 | default_value_null_const = true; | 257 | 0 | } else { | 258 | 0 | auto data = default_value_col->get_data_at(0); | 259 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | 0 | &default_value_doc)); | 261 | 0 | constant_default_value = data; | 262 | 0 | } | 263 | 1 | } else { | 264 | 1 | if (default_value_col->is_nullable()) { | 265 | 1 | const auto& nullable_col = | 266 | 1 | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | 1 | default_value_str_col = assert_cast<const ColumnString*>( | 268 | 1 | nullable_col.get_nested_column_ptr().get()); | 269 | 1 | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | 1 | } else { | 271 | 0 | default_value_str_col = | 272 | 0 | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | 0 | } | 274 | 1 | } | 275 | 1 | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | 0 | default_value_const = true; | 279 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | 0 | } | 282 | 2 | } | 283 | | | 284 | 1 | auto col_to = ColumnString::create(); | 285 | | | 286 | 2 | col_to->reserve(input_rows_count); | 287 | | | 288 | 2 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 2 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 25 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 23 | if (is_nullable && null_map_data[i]) { | 295 | 1 | col_to->insert_default(); | 296 | 1 | continue; | 297 | 1 | } | 298 | | | 299 | 22 | auto index = index_check_const(i, col_from_is_const); | 300 | 22 | const auto& val = col_from_string->get_data_at(index); | 301 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 22 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 16 | } else { | 306 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | | return Status::InvalidArgument( | 308 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | | null_map_data[i] = 1; | 311 | | col_to->insert_default(); | 312 | 6 | } else { | 313 | 6 | if (default_value_const) { | 314 | 0 | if (default_value_null_const) { | 315 | 0 | null_map_data[i] = 1; | 316 | 0 | col_to->insert_default(); | 317 | 0 | } else { | 318 | 0 | col_to->insert_data(constant_default_value.data, | 319 | 0 | constant_default_value.size); | 320 | 0 | } | 321 | 6 | } else { | 322 | 6 | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | 0 | null_map_data[i] = 1; | 324 | 0 | col_to->insert_default(); | 325 | 0 | continue; | 326 | 0 | } | 327 | 6 | auto value = default_value_str_col->get_data_at(i); | 328 | 6 | col_to->insert_data(value.data, value.size); | 329 | 6 | } | 330 | 6 | } | 331 | 6 | } | 332 | 22 | } | 333 | | | 334 | 2 | if (is_nullable) { | 335 | 1 | block.replace_by_position( | 336 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 1 | } else { | 338 | 1 | block.replace_by_position(result, std::move(col_to)); | 339 | 1 | } | 340 | | | 341 | 2 | return Status::OK(); | 342 | 2 | } |
|
343 | | }; |
344 | | |
345 | | // jsonb_parse return type nullable as input |
346 | | using FunctionJsonbParse = |
347 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>; |
348 | | using FunctionJsonbParseErrorNull = |
349 | | FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>; |
350 | | using FunctionJsonbParseErrorValue = |
351 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>; |
352 | | |
353 | | // func(jsonb, [varchar, varchar, ...]) -> nullable(type) |
354 | | template <typename Impl> |
355 | | class FunctionJsonbExtract : public IFunction { |
356 | | public: |
357 | | static constexpr auto name = Impl::name; |
358 | | static constexpr auto alias = Impl::alias; |
359 | 13 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv Line | Count | Source | 359 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv Line | Count | Source | 359 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv Line | Count | Source | 359 | 7 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv Line | Count | Source | 359 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
|
360 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev |
361 | 10 | bool is_variadic() const override { return true; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv Line | Count | Source | 361 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv Line | Count | Source | 361 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv Line | Count | Source | 361 | 7 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv Line | Count | Source | 361 | 1 | bool is_variadic() const override { return true; } |
|
362 | 1 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv Line | Count | Source | 362 | 1 | size_t get_number_of_arguments() const override { return 0; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv |
363 | 10 | bool use_default_implementation_for_nulls() const override { return false; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv Line | Count | Source | 363 | 10 | bool use_default_implementation_for_nulls() const override { return false; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv |
364 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
365 | 5 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); |
366 | 5 | } Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 364 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 365 | 5 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 366 | 5 | } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
367 | 4 | DataTypes get_variadic_argument_types_impl() const override { |
368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { |
369 | | return Impl::get_variadic_argument_types_impl(); |
370 | 4 | } else { |
371 | 4 | return {}; |
372 | 4 | } |
373 | 4 | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 1 | } else { | 371 | 1 | return {}; | 372 | 1 | } | 373 | 1 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 1 | } else { | 371 | 1 | return {}; | 372 | 1 | } | 373 | 1 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 1 | } else { | 371 | 1 | return {}; | 372 | 1 | } | 373 | 1 | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 1 | } else { | 371 | 1 | return {}; | 372 | 1 | } | 373 | 1 | } |
|
374 | | |
375 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
376 | 5 | uint32_t result, size_t input_rows_count) const override { |
377 | 5 | DCHECK_GE(arguments.size(), 2); |
378 | | |
379 | 5 | ColumnPtr jsonb_data_column; |
380 | 5 | bool jsonb_data_const = false; |
381 | 5 | const NullMap* data_null_map = nullptr; |
382 | | |
383 | 5 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != |
384 | 5 | PrimitiveType::TYPE_JSONB) { |
385 | 1 | return Status::InvalidArgument( |
386 | 1 | "jsonb_extract first argument should be json type, but got {}", |
387 | 1 | block.get_by_position(arguments[0]).type->get_name()); |
388 | 1 | } |
389 | | |
390 | | // prepare jsonb data column |
391 | 4 | std::tie(jsonb_data_column, jsonb_data_const) = |
392 | 4 | unpack_if_const(block.get_by_position(arguments[0]).column); |
393 | 4 | if (jsonb_data_column->is_nullable()) { |
394 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); |
395 | 4 | jsonb_data_column = nullable_column.get_nested_column_ptr(); |
396 | 4 | data_null_map = &nullable_column.get_null_map_data(); |
397 | 4 | } |
398 | 4 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); |
399 | 4 | const auto& loffsets = |
400 | 4 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); |
401 | | |
402 | | // prepare parse path column prepare |
403 | 4 | std::vector<const ColumnString*> jsonb_path_columns; |
404 | 4 | std::vector<bool> path_const(arguments.size() - 1); |
405 | 4 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); |
406 | 8 | for (int i = 0; i < arguments.size() - 1; ++i) { |
407 | 4 | ColumnPtr path_column; |
408 | 4 | bool is_const = false; |
409 | 4 | std::tie(path_column, is_const) = |
410 | 4 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
411 | 4 | path_const[i] = is_const; |
412 | 4 | if (path_column->is_nullable()) { |
413 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
414 | 4 | path_column = nullable_column.get_nested_column_ptr(); |
415 | 4 | path_null_maps[i] = &nullable_column.get_null_map_data(); |
416 | 4 | } |
417 | 4 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); |
418 | 4 | } |
419 | | |
420 | 4 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
421 | 4 | auto res = Impl::ColumnType::create(); |
422 | | |
423 | | // execute Impl |
424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || |
425 | 4 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { |
426 | 4 | auto& res_data = res->get_chars(); |
427 | 4 | auto& res_offsets = res->get_offsets(); |
428 | 4 | RETURN_IF_ERROR(Impl::vector_vector_v2( |
429 | 4 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, |
430 | 4 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); |
431 | 4 | } else { |
432 | | // not support other extract type for now (e.g. int, double, ...) |
433 | 0 | DCHECK_EQ(jsonb_path_columns.size(), 1); |
434 | 0 | const auto& rdata = jsonb_path_columns[0]->get_chars(); |
435 | 0 | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); |
436 | |
|
437 | 0 | auto create_all_null_result = [&]() { |
438 | 0 | res = Impl::ColumnType::create(); |
439 | 0 | res->insert_default(); |
440 | 0 | auto nullable_column = |
441 | 0 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
442 | 0 | auto const_column = |
443 | 0 | ColumnConst::create(std::move(nullable_column), input_rows_count); |
444 | 0 | block.get_by_position(result).column = std::move(const_column); |
445 | 0 | return Status::OK(); |
446 | 0 | }; |
447 | |
|
448 | 0 | if (jsonb_data_const) { |
449 | 0 | if (data_null_map && (*data_null_map)[0]) { |
450 | 0 | return create_all_null_result(); |
451 | 0 | } |
452 | | |
453 | 0 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), |
454 | 0 | rdata, roffsets, path_null_maps[0], |
455 | 0 | res->get_data(), null_map->get_data())); |
456 | 0 | } else if (path_const[0]) { |
457 | 0 | if (path_null_maps[0] && (*path_null_maps[0])[0]) { |
458 | 0 | return create_all_null_result(); |
459 | 0 | } |
460 | 0 | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, |
461 | 0 | jsonb_path_columns[0]->get_data_at(0), |
462 | 0 | res->get_data(), null_map->get_data())); |
463 | 0 | } else { |
464 | 0 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, |
465 | 0 | roffsets, path_null_maps[0], res->get_data(), |
466 | 0 | null_map->get_data())); |
467 | 0 | } |
468 | 0 | } |
469 | | |
470 | 4 | block.get_by_position(result).column = |
471 | 4 | ColumnNullable::create(std::move(res), std::move(null_map)); |
472 | 4 | return Status::OK(); |
473 | 5 | } Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 376 | 5 | uint32_t result, size_t input_rows_count) const override { | 377 | 5 | DCHECK_GE(arguments.size(), 2); | 378 | | | 379 | 5 | ColumnPtr jsonb_data_column; | 380 | 5 | bool jsonb_data_const = false; | 381 | 5 | const NullMap* data_null_map = nullptr; | 382 | | | 383 | 5 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 384 | 5 | PrimitiveType::TYPE_JSONB) { | 385 | 1 | return Status::InvalidArgument( | 386 | 1 | "jsonb_extract first argument should be json type, but got {}", | 387 | 1 | block.get_by_position(arguments[0]).type->get_name()); | 388 | 1 | } | 389 | | | 390 | | // prepare jsonb data column | 391 | 4 | std::tie(jsonb_data_column, jsonb_data_const) = | 392 | 4 | unpack_if_const(block.get_by_position(arguments[0]).column); | 393 | 4 | if (jsonb_data_column->is_nullable()) { | 394 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 395 | 4 | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 396 | 4 | data_null_map = &nullable_column.get_null_map_data(); | 397 | 4 | } | 398 | 4 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 399 | 4 | const auto& loffsets = | 400 | 4 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 401 | | | 402 | | // prepare parse path column prepare | 403 | 4 | std::vector<const ColumnString*> jsonb_path_columns; | 404 | 4 | std::vector<bool> path_const(arguments.size() - 1); | 405 | 4 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 406 | 8 | for (int i = 0; i < arguments.size() - 1; ++i) { | 407 | 4 | ColumnPtr path_column; | 408 | 4 | bool is_const = false; | 409 | 4 | std::tie(path_column, is_const) = | 410 | 4 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 411 | 4 | path_const[i] = is_const; | 412 | 4 | if (path_column->is_nullable()) { | 413 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 414 | 4 | path_column = nullable_column.get_nested_column_ptr(); | 415 | 4 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 416 | 4 | } | 417 | 4 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 418 | 4 | } | 419 | | | 420 | 4 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 421 | 4 | auto res = Impl::ColumnType::create(); | 422 | | | 423 | | // execute Impl | 424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 425 | 4 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 426 | 4 | auto& res_data = res->get_chars(); | 427 | 4 | auto& res_offsets = res->get_offsets(); | 428 | 4 | RETURN_IF_ERROR(Impl::vector_vector_v2( | 429 | 4 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 430 | 4 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 431 | | } else { | 432 | | // not support other extract type for now (e.g. int, double, ...) | 433 | | DCHECK_EQ(jsonb_path_columns.size(), 1); | 434 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 435 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 436 | | | 437 | | auto create_all_null_result = [&]() { | 438 | | res = Impl::ColumnType::create(); | 439 | | res->insert_default(); | 440 | | auto nullable_column = | 441 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 442 | | auto const_column = | 443 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 444 | | block.get_by_position(result).column = std::move(const_column); | 445 | | return Status::OK(); | 446 | | }; | 447 | | | 448 | | if (jsonb_data_const) { | 449 | | if (data_null_map && (*data_null_map)[0]) { | 450 | | return create_all_null_result(); | 451 | | } | 452 | | | 453 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 454 | | rdata, roffsets, path_null_maps[0], | 455 | | res->get_data(), null_map->get_data())); | 456 | | } else if (path_const[0]) { | 457 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 458 | | return create_all_null_result(); | 459 | | } | 460 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 461 | | jsonb_path_columns[0]->get_data_at(0), | 462 | | res->get_data(), null_map->get_data())); | 463 | | } else { | 464 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 465 | | roffsets, path_null_maps[0], res->get_data(), | 466 | | null_map->get_data())); | 467 | | } | 468 | | } | 469 | | | 470 | 4 | block.get_by_position(result).column = | 471 | 4 | ColumnNullable::create(std::move(res), std::move(null_map)); | 472 | 4 | return Status::OK(); | 473 | 5 | } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
474 | | }; |
475 | | |
476 | | class FunctionJsonbKeys : public IFunction { |
477 | | public: |
478 | | static constexpr auto name = "json_keys"; |
479 | | static constexpr auto alias = "jsonb_keys"; |
480 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); } |
481 | 0 | String get_name() const override { return name; } |
482 | 1 | bool is_variadic() const override { return true; } |
483 | 0 | size_t get_number_of_arguments() const override { return 0; } |
484 | | |
485 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
486 | | |
487 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
488 | 0 | return make_nullable( |
489 | 0 | std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>()))); |
490 | 0 | } |
491 | | |
492 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
493 | 0 | uint32_t result, size_t input_rows_count) const override { |
494 | 0 | DCHECK_GE(arguments.size(), 1); |
495 | 0 | DCHECK(arguments.size() == 1 || arguments.size() == 2) |
496 | 0 | << "json_keys should have 1 or 2 arguments, but got " << arguments.size(); |
497 | |
|
498 | 0 | const NullMap* data_null_map = nullptr; |
499 | 0 | const ColumnString* col_from_string = nullptr; |
500 | | // prepare jsonb data column |
501 | 0 | auto&& [jsonb_data_column, json_data_const] = |
502 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
503 | 0 | if (jsonb_data_column->is_nullable()) { |
504 | 0 | const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get()); |
505 | 0 | col_from_string = |
506 | 0 | assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
507 | 0 | data_null_map = &nullable->get_null_map_data(); |
508 | 0 | } else { |
509 | 0 | col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
510 | 0 | } |
511 | | |
512 | | // prepare parse path column prepare, maybe we do not have path column |
513 | 0 | ColumnPtr jsonb_path_column = nullptr; |
514 | 0 | const ColumnString* jsonb_path_col = nullptr; |
515 | 0 | bool path_const = false; |
516 | 0 | const NullMap* path_null_map = nullptr; |
517 | 0 | if (arguments.size() == 2) { |
518 | | // we have should have a ColumnString for path |
519 | 0 | std::tie(jsonb_path_column, path_const) = |
520 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
521 | 0 | if (jsonb_path_column->is_nullable()) { |
522 | 0 | const auto* nullable = |
523 | 0 | check_and_get_column<ColumnNullable>(jsonb_path_column.get()); |
524 | 0 | jsonb_path_column = nullable->get_nested_column_ptr(); |
525 | 0 | path_null_map = &nullable->get_null_map_data(); |
526 | 0 | } |
527 | 0 | jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get()); |
528 | 0 | } |
529 | |
|
530 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
531 | 0 | NullMap& res_null_map = null_map->get_data(); |
532 | |
|
533 | 0 | auto dst_arr = ColumnArray::create( |
534 | 0 | ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()), |
535 | 0 | ColumnArray::ColumnOffsets::create()); |
536 | 0 | auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data()); |
537 | |
|
538 | 0 | Status st = std::visit( |
539 | 0 | [&](auto data_const, auto has_path, auto path_const) { |
540 | 0 | return inner_loop_impl<data_const, has_path, path_const>( |
541 | 0 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, |
542 | 0 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); |
543 | 0 | }, Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
544 | 0 | make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column), |
545 | 0 | make_bool_variant(path_const)); |
546 | 0 | if (!st.ok()) { |
547 | 0 | return st; |
548 | 0 | } |
549 | 0 | block.get_by_position(result).column = |
550 | 0 | ColumnNullable::create(std::move(dst_arr), std::move(null_map)); |
551 | 0 | return st; |
552 | 0 | } |
553 | | |
554 | | private: |
555 | | template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST> |
556 | | static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr, |
557 | | ColumnNullable& dst_nested_column, |
558 | | NullMap& res_null_map, |
559 | | const ColumnString& col_from_string, |
560 | | const NullMap* jsonb_data_nullmap, |
561 | | const ColumnString* jsonb_path_column, |
562 | 0 | const NullMap* path_null_map) { |
563 | | // if path is const, we just need to parse it once |
564 | 0 | JsonbPath const_path; |
565 | 0 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { |
566 | 0 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); |
567 | 0 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { |
568 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
569 | 0 | r_raw_ref.to_string()); |
570 | 0 | } |
571 | | |
572 | 0 | if (const_path.is_wildcard()) { |
573 | 0 | return Status::InvalidJsonPath( |
574 | 0 | "In this situation, path expressions may not contain the * and ** tokens " |
575 | 0 | "or an array range."); |
576 | 0 | } |
577 | 0 | } |
578 | | |
579 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
580 | 0 | auto index = index_check_const(i, JSONB_DATA_CONST); |
581 | | // if jsonb data is null or path column is null , we should return null |
582 | 0 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { |
583 | 0 | res_null_map[i] = 1; |
584 | 0 | dst_arr.insert_default(); |
585 | 0 | continue; |
586 | 0 | } |
587 | 0 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { |
588 | 0 | if (path_null_map && (*path_null_map)[i]) { |
589 | 0 | res_null_map[i] = 1; |
590 | 0 | dst_arr.insert_default(); |
591 | 0 | continue; |
592 | 0 | } |
593 | 0 | } |
594 | | |
595 | 0 | auto json_data = col_from_string.get_data_at(index); |
596 | 0 | const JsonbDocument* doc = nullptr; |
597 | 0 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); |
598 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
599 | 0 | dst_arr.clear(); |
600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); |
601 | 0 | } |
602 | 0 | const JsonbValue* obj_val; |
603 | 0 | JsonbFindResult find_result; |
604 | 0 | if constexpr (JSONB_PATH_PARAM) { |
605 | 0 | if constexpr (!JSON_PATH_CONST) { |
606 | 0 | auto data = jsonb_path_column->get_data_at(i); |
607 | 0 | JsonbPath path; |
608 | 0 | if (!path.seek(data.data, data.size)) { |
609 | 0 | return Status::InvalidArgument( |
610 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
611 | 0 | std::string_view(data.data, data.size), i); |
612 | 0 | } |
613 | | |
614 | 0 | if (path.is_wildcard()) { |
615 | 0 | return Status::InvalidJsonPath( |
616 | 0 | "In this situation, path expressions may not contain the * and ** " |
617 | 0 | "tokens " |
618 | 0 | "or an array range. at row: {}", |
619 | 0 | i); |
620 | 0 | } |
621 | 0 | find_result = doc->getValue()->findValue(path); |
622 | 0 | } else { |
623 | 0 | find_result = doc->getValue()->findValue(const_path); |
624 | 0 | } |
625 | 0 | obj_val = find_result.value; |
626 | 0 | } else { |
627 | 0 | obj_val = doc->getValue(); |
628 | 0 | } |
629 | | |
630 | 0 | if (!obj_val || !obj_val->isObject()) { |
631 | | // if jsonb data is not object we should return null |
632 | 0 | res_null_map[i] = 1; |
633 | 0 | dst_arr.insert_default(); |
634 | 0 | continue; |
635 | 0 | } |
636 | 0 | const auto* obj = obj_val->unpack<ObjectVal>(); |
637 | 0 | for (const auto& it : *obj) { |
638 | 0 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); |
639 | 0 | } |
640 | 0 | dst_arr.get_offsets().push_back(dst_nested_column.size()); |
641 | 0 | } //for |
642 | 0 | return Status::OK(); |
643 | 0 | } Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ |
644 | | }; |
645 | | |
646 | | class FunctionJsonbExtractPath : public IFunction { |
647 | | public: |
648 | | static constexpr auto name = "json_exists_path"; |
649 | | static constexpr auto alias = "jsonb_exists_path"; |
650 | | using ColumnType = ColumnUInt8; |
651 | | using Container = typename ColumnType::Container; |
652 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); } |
653 | 1 | String get_name() const override { return name; } |
654 | 0 | size_t get_number_of_arguments() const override { return 2; } |
655 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
656 | | // it only needs to indicate existence and does not need to return nullable values. |
657 | 0 | const auto nullable = std::ranges::any_of( |
658 | 0 | arguments, [](const DataTypePtr& type) { return type->is_nullable(); }); |
659 | 0 | if (nullable) { |
660 | 0 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
661 | 0 | } else { |
662 | 0 | return std::make_shared<DataTypeUInt8>(); |
663 | 0 | } |
664 | 0 | } |
665 | | |
666 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
667 | | |
668 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
669 | 0 | uint32_t result, size_t input_rows_count) const override { |
670 | | // prepare jsonb data column |
671 | 0 | auto&& [jsonb_data_column, jsonb_data_const] = |
672 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
673 | |
|
674 | 0 | const NullMap* data_null_map = nullptr; |
675 | 0 | const ColumnString* data_col = nullptr; |
676 | 0 | if (jsonb_data_column->is_nullable()) { |
677 | 0 | const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get()); |
678 | 0 | data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
679 | 0 | data_null_map = &nullable->get_null_map_data(); |
680 | 0 | } else { |
681 | 0 | data_col = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
682 | 0 | } |
683 | |
|
684 | 0 | const auto& ldata = data_col->get_chars(); |
685 | 0 | const auto& loffsets = data_col->get_offsets(); |
686 | | |
687 | | // prepare parse path column prepare |
688 | 0 | auto&& [path_column, path_const] = |
689 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
690 | 0 | const ColumnString* path_col = nullptr; |
691 | 0 | const NullMap* path_null_map = nullptr; |
692 | 0 | if (path_column->is_nullable()) { |
693 | 0 | const auto* nullable = assert_cast<const ColumnNullable*>(path_column.get()); |
694 | 0 | path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
695 | 0 | path_null_map = &nullable->get_null_map_data(); |
696 | 0 | } else { |
697 | 0 | path_col = assert_cast<const ColumnString*>(path_column.get()); |
698 | 0 | } |
699 | |
|
700 | 0 | DCHECK(!(jsonb_data_const && path_const)) |
701 | 0 | << "jsonb_data_const and path_const should not be both const"; |
702 | |
|
703 | 0 | auto create_all_null_result = [&]() { |
704 | 0 | auto res = ColumnType::create(); |
705 | 0 | res->insert_default(); |
706 | 0 | auto nullable_column = |
707 | 0 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
708 | 0 | auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count); |
709 | 0 | block.get_by_position(result).column = std::move(const_column); |
710 | 0 | return Status::OK(); |
711 | 0 | }; |
712 | |
|
713 | 0 | MutableColumnPtr result_null_map_column; |
714 | 0 | NullMap* result_null_map = nullptr; |
715 | 0 | if (data_null_map || path_null_map) { |
716 | 0 | result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
717 | 0 | result_null_map = &assert_cast<ColumnUInt8&>(*result_null_map_column).get_data(); |
718 | |
|
719 | 0 | if (data_null_map) { |
720 | 0 | VectorizedUtils::update_null_map(*result_null_map, *data_null_map, |
721 | 0 | jsonb_data_const); |
722 | 0 | } |
723 | |
|
724 | 0 | if (path_null_map) { |
725 | 0 | VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const); |
726 | 0 | } |
727 | |
|
728 | 0 | if (!simd::contain_zero(result_null_map->data(), input_rows_count)) { |
729 | 0 | return create_all_null_result(); |
730 | 0 | } |
731 | 0 | } |
732 | | |
733 | 0 | auto res = ColumnType::create(); |
734 | |
|
735 | 0 | bool is_invalid_json_path = false; |
736 | |
|
737 | 0 | const auto& rdata = path_col->get_chars(); |
738 | 0 | const auto& roffsets = path_col->get_offsets(); |
739 | 0 | if (jsonb_data_const) { |
740 | 0 | if (data_null_map && (*data_null_map)[0]) { |
741 | 0 | return create_all_null_result(); |
742 | 0 | } |
743 | 0 | scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(), |
744 | 0 | result_null_map, is_invalid_json_path); |
745 | 0 | } else if (path_const) { |
746 | 0 | if (path_null_map && (*path_null_map)[0]) { |
747 | 0 | return create_all_null_result(); |
748 | 0 | } |
749 | 0 | vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(), |
750 | 0 | result_null_map, is_invalid_json_path); |
751 | 0 | } else { |
752 | 0 | vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), |
753 | 0 | result_null_map, is_invalid_json_path); |
754 | 0 | } |
755 | 0 | if (is_invalid_json_path) { |
756 | 0 | return Status::InvalidArgument( |
757 | 0 | "Json path error: Invalid Json Path for value: {}", |
758 | 0 | std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size())); |
759 | 0 | } |
760 | | |
761 | 0 | if (result_null_map) { |
762 | 0 | auto nullabel_col = |
763 | 0 | ColumnNullable::create(std::move(res), std::move(result_null_map_column)); |
764 | 0 | block.get_by_position(result).column = std::move(nullabel_col); |
765 | 0 | } else { |
766 | 0 | block.get_by_position(result).column = std::move(res); |
767 | 0 | } |
768 | 0 | return Status::OK(); |
769 | 0 | } |
770 | | |
771 | | private: |
772 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, |
773 | 0 | size_t l_str_size, JsonbPath& path) { |
774 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
775 | 0 | const JsonbDocument* doc = nullptr; |
776 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
777 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
778 | 0 | return; |
779 | 0 | } |
780 | | |
781 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
782 | 0 | auto result = doc->getValue()->findValue(path); |
783 | |
|
784 | 0 | if (result.value) { |
785 | 0 | res[i] = 1; |
786 | 0 | } |
787 | 0 | } |
788 | | static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
789 | | const ColumnString::Offsets& loffsets, |
790 | | const ColumnString::Chars& rdata, |
791 | | const ColumnString::Offsets& roffsets, Container& res, |
792 | 0 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
793 | 0 | const size_t size = loffsets.size(); |
794 | 0 | res.resize_fill(size, 0); |
795 | |
|
796 | 0 | for (size_t i = 0; i < size; i++) { |
797 | 0 | if (result_null_map && (*result_null_map)[i]) { |
798 | 0 | continue; |
799 | 0 | } |
800 | | |
801 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
802 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
803 | |
|
804 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
805 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
806 | |
|
807 | 0 | JsonbPath path; |
808 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
809 | 0 | is_invalid_json_path = true; |
810 | 0 | return; |
811 | 0 | } |
812 | | |
813 | 0 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
814 | 0 | } |
815 | 0 | } |
816 | | static void scalar_vector(FunctionContext* context, const StringRef& ldata, |
817 | | const ColumnString::Chars& rdata, |
818 | | const ColumnString::Offsets& roffsets, Container& res, |
819 | 0 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
820 | 0 | const size_t size = roffsets.size(); |
821 | 0 | res.resize_fill(size, 0); |
822 | |
|
823 | 0 | for (size_t i = 0; i < size; i++) { |
824 | 0 | if (result_null_map && (*result_null_map)[i]) { |
825 | 0 | continue; |
826 | 0 | } |
827 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
828 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
829 | |
|
830 | 0 | JsonbPath path; |
831 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
832 | 0 | is_invalid_json_path = true; |
833 | 0 | return; |
834 | 0 | } |
835 | | |
836 | 0 | inner_loop_impl(i, res, ldata.data, ldata.size, path); |
837 | 0 | } |
838 | 0 | } |
839 | | static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
840 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
841 | | Container& res, const NullMap* result_null_map, |
842 | 0 | bool& is_invalid_json_path) { |
843 | 0 | const size_t size = loffsets.size(); |
844 | 0 | res.resize_fill(size, 0); |
845 | |
|
846 | 0 | JsonbPath path; |
847 | 0 | if (!path.seek(rdata.data, rdata.size)) { |
848 | 0 | is_invalid_json_path = true; |
849 | 0 | return; |
850 | 0 | } |
851 | | |
852 | 0 | for (size_t i = 0; i < size; i++) { |
853 | 0 | if (result_null_map && (*result_null_map)[i]) { |
854 | 0 | continue; |
855 | 0 | } |
856 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
857 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
858 | |
|
859 | 0 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
860 | 0 | } |
861 | 0 | } |
862 | | }; |
863 | | |
864 | | template <typename ValueType> |
865 | | struct JsonbExtractStringImpl { |
866 | | using ReturnType = typename ValueType::ReturnType; |
867 | | using ColumnType = typename ValueType::ColumnType; |
868 | | |
869 | | private: |
870 | | static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i, |
871 | | ColumnString::Chars& res_data, |
872 | | ColumnString::Offsets& res_offsets, NullMap& null_map, |
873 | | std::unique_ptr<JsonbToJson>& formater, |
874 | 90 | const char* l_raw, size_t l_size, JsonbPath& path) { |
875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
876 | 90 | const JsonbDocument* doc = nullptr; |
877 | 90 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
878 | 90 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
880 | 0 | return; |
881 | 0 | } |
882 | | |
883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
884 | 90 | auto find_result = doc->getValue()->findValue(path); |
885 | | |
886 | 90 | if (UNLIKELY(!find_result.value)) { |
887 | 46 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
888 | 46 | return; |
889 | 46 | } |
890 | | |
891 | 44 | if constexpr (ValueType::only_get_type) { |
892 | 0 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, |
893 | 0 | res_data, res_offsets); |
894 | 0 | return; |
895 | 44 | } else { |
896 | 44 | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); |
897 | 44 | if constexpr (ValueType::no_quotes) { |
898 | 0 | if (find_result.value->isString()) { |
899 | 0 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); |
900 | 0 | const auto* blob = str_value->getBlob(); |
901 | 0 | if (str_value->length() > 1 && blob[0] == '"' && |
902 | 0 | blob[str_value->length() - 1] == '"') { |
903 | 0 | writer->writeStartString(); |
904 | 0 | writer->writeString(blob + 1, str_value->length() - 2); |
905 | 0 | writer->writeEndString(); |
906 | 0 | StringOP::push_value_string( |
907 | 0 | std::string_view(writer->getOutput()->getBuffer(), |
908 | 0 | writer->getOutput()->getSize()), |
909 | 0 | i, res_data, res_offsets); |
910 | 0 | return; |
911 | 0 | } |
912 | 0 | } |
913 | 0 | } |
914 | 0 | writer->writeValueSimple(find_result.value); |
915 | 44 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
916 | 44 | writer->getOutput()->getSize()), |
917 | 44 | i, res_data, res_offsets); |
918 | 44 | } |
919 | 44 | } Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 874 | 90 | const char* l_raw, size_t l_size, JsonbPath& path) { | 875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 876 | 90 | const JsonbDocument* doc = nullptr; | 877 | 90 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 878 | 90 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 880 | 0 | return; | 881 | 0 | } | 882 | | | 883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 884 | 90 | auto find_result = doc->getValue()->findValue(path); | 885 | | | 886 | 90 | if (UNLIKELY(!find_result.value)) { | 887 | 46 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 888 | 46 | return; | 889 | 46 | } | 890 | | | 891 | | if constexpr (ValueType::only_get_type) { | 892 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 893 | | res_data, res_offsets); | 894 | | return; | 895 | 44 | } else { | 896 | 44 | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 897 | | if constexpr (ValueType::no_quotes) { | 898 | | if (find_result.value->isString()) { | 899 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 900 | | const auto* blob = str_value->getBlob(); | 901 | | if (str_value->length() > 1 && blob[0] == '"' && | 902 | | blob[str_value->length() - 1] == '"') { | 903 | | writer->writeStartString(); | 904 | | writer->writeString(blob + 1, str_value->length() - 2); | 905 | | writer->writeEndString(); | 906 | | StringOP::push_value_string( | 907 | | std::string_view(writer->getOutput()->getBuffer(), | 908 | | writer->getOutput()->getSize()), | 909 | | i, res_data, res_offsets); | 910 | | return; | 911 | | } | 912 | | } | 913 | | } | 914 | 44 | writer->writeValueSimple(find_result.value); | 915 | 44 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 916 | 44 | writer->getOutput()->getSize()), | 917 | 44 | i, res_data, res_offsets); | 918 | 44 | } | 919 | 44 | } |
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE |
920 | | |
921 | | public: |
922 | | // for jsonb_extract_string |
923 | | static Status vector_vector_v2( |
924 | | FunctionContext* context, const ColumnString::Chars& ldata, |
925 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
926 | | const bool& json_data_const, |
927 | | const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths |
928 | | const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const, |
929 | 4 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { |
930 | 4 | const size_t input_rows_count = null_map.size(); |
931 | 4 | res_offsets.resize(input_rows_count); |
932 | | |
933 | 4 | auto writer = std::make_unique<JsonbWriter>(); |
934 | 4 | std::unique_ptr<JsonbToJson> formater; |
935 | | |
936 | | // reuseable json path list, espacially for const path |
937 | 4 | std::vector<JsonbPath> json_path_list; |
938 | 4 | json_path_list.resize(rdata_columns.size()); |
939 | | |
940 | | // lambda function to parse json path for row i and path pi |
941 | 90 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { |
942 | 90 | const auto index = index_check_const(i, path_const[pi]); |
943 | | |
944 | 90 | const ColumnString* path_col = rdata_columns[pi]; |
945 | 90 | const ColumnString::Chars& rdata = path_col->get_chars(); |
946 | 90 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); |
947 | 90 | size_t r_off = roffsets[index - 1]; |
948 | 90 | size_t r_size = roffsets[index] - r_off; |
949 | 90 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); |
950 | | |
951 | 90 | JsonbPath path; |
952 | 90 | if (!path.seek(r_raw, r_size)) { |
953 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
954 | 0 | std::string_view(r_raw, r_size)); |
955 | 0 | } |
956 | | |
957 | 90 | json_path_list[pi] = std::move(path); |
958 | | |
959 | 90 | return Status::OK(); |
960 | 90 | }; Unexecuted instantiation: _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 941 | 90 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 90 | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 90 | const ColumnString* path_col = rdata_columns[pi]; | 945 | 90 | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 90 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 90 | size_t r_off = roffsets[index - 1]; | 948 | 90 | size_t r_size = roffsets[index] - r_off; | 949 | 90 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 90 | JsonbPath path; | 952 | 90 | if (!path.seek(r_raw, r_size)) { | 953 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 0 | std::string_view(r_raw, r_size)); | 955 | 0 | } | 956 | | | 957 | 90 | json_path_list[pi] = std::move(path); | 958 | | | 959 | 90 | return Status::OK(); | 960 | 90 | }; |
Unexecuted instantiation: _ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm |
961 | | |
962 | 8 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { |
963 | 4 | if (path_const[pi]) { |
964 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { |
965 | 0 | continue; |
966 | 0 | } |
967 | 0 | RETURN_IF_ERROR(parse_json_path(0, pi)); |
968 | 0 | } |
969 | 4 | } |
970 | | |
971 | 4 | res_data.reserve(ldata.size()); |
972 | 98 | for (size_t i = 0; i < input_rows_count; ++i) { |
973 | 94 | if (null_map[i]) { |
974 | 0 | continue; |
975 | 0 | } |
976 | | |
977 | 94 | const auto data_index = index_check_const(i, json_data_const); |
978 | 94 | if (l_null_map && (*l_null_map)[data_index]) { |
979 | 4 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
980 | 4 | continue; |
981 | 4 | } |
982 | | |
983 | 90 | size_t l_off = loffsets[data_index - 1]; |
984 | 90 | size_t l_size = loffsets[data_index] - l_off; |
985 | 90 | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); |
986 | 90 | if (rdata_columns.size() == 1) { // just return origin value |
987 | 90 | const auto path_index = index_check_const(i, path_const[0]); |
988 | 90 | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { |
989 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
990 | 0 | continue; |
991 | 0 | } |
992 | | |
993 | 90 | if (!path_const[0]) { |
994 | 90 | RETURN_IF_ERROR(parse_json_path(i, 0)); |
995 | 90 | } |
996 | | |
997 | 90 | writer->reset(); |
998 | 90 | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, |
999 | 90 | l_size, json_path_list[0]); |
1000 | 90 | } else { // will make array string to user |
1001 | 0 | writer->reset(); |
1002 | 0 | bool has_value = false; |
1003 | | |
1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1005 | 0 | const JsonbDocument* doc = nullptr; |
1006 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
1007 | |
|
1008 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { |
1009 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1010 | 0 | continue; |
1011 | 0 | } |
1012 | | |
1013 | 0 | const auto path_index = index_check_const(i, path_const[pi]); |
1014 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { |
1015 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1016 | 0 | break; |
1017 | 0 | } |
1018 | | |
1019 | 0 | if (!path_const[pi]) { |
1020 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); |
1021 | 0 | } |
1022 | | |
1023 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); |
1024 | |
|
1025 | 0 | if (find_result.value) { |
1026 | 0 | if (!has_value) { |
1027 | 0 | has_value = true; |
1028 | 0 | writer->writeStartArray(); |
1029 | 0 | } |
1030 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { |
1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], |
1032 | | // if value is array, we should write all items in array, instead of write the array itself. |
1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] |
1034 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { |
1035 | 0 | writer->writeValue(&item); |
1036 | 0 | } |
1037 | 0 | } else { |
1038 | 0 | writer->writeValue(find_result.value); |
1039 | 0 | } |
1040 | 0 | } |
1041 | 0 | } |
1042 | 0 | if (has_value) { |
1043 | 0 | writer->writeEndArray(); |
1044 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
1045 | 0 | writer->getOutput()->getSize()), |
1046 | 0 | i, res_data, res_offsets); |
1047 | 0 | } else { |
1048 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1049 | 0 | } |
1050 | 0 | } |
1051 | 90 | } //for |
1052 | 4 | return Status::OK(); |
1053 | 4 | } Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 929 | 4 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 930 | 4 | const size_t input_rows_count = null_map.size(); | 931 | 4 | res_offsets.resize(input_rows_count); | 932 | | | 933 | 4 | auto writer = std::make_unique<JsonbWriter>(); | 934 | 4 | std::unique_ptr<JsonbToJson> formater; | 935 | | | 936 | | // reuseable json path list, espacially for const path | 937 | 4 | std::vector<JsonbPath> json_path_list; | 938 | 4 | json_path_list.resize(rdata_columns.size()); | 939 | | | 940 | | // lambda function to parse json path for row i and path pi | 941 | 4 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 4 | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 4 | const ColumnString* path_col = rdata_columns[pi]; | 945 | 4 | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 4 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 4 | size_t r_off = roffsets[index - 1]; | 948 | 4 | size_t r_size = roffsets[index] - r_off; | 949 | 4 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 4 | JsonbPath path; | 952 | 4 | if (!path.seek(r_raw, r_size)) { | 953 | 4 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 4 | std::string_view(r_raw, r_size)); | 955 | 4 | } | 956 | | | 957 | 4 | json_path_list[pi] = std::move(path); | 958 | | | 959 | 4 | return Status::OK(); | 960 | 4 | }; | 961 | | | 962 | 8 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 963 | 4 | if (path_const[pi]) { | 964 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 965 | 0 | continue; | 966 | 0 | } | 967 | 0 | RETURN_IF_ERROR(parse_json_path(0, pi)); | 968 | 0 | } | 969 | 4 | } | 970 | | | 971 | 4 | res_data.reserve(ldata.size()); | 972 | 98 | for (size_t i = 0; i < input_rows_count; ++i) { | 973 | 94 | if (null_map[i]) { | 974 | 0 | continue; | 975 | 0 | } | 976 | | | 977 | 94 | const auto data_index = index_check_const(i, json_data_const); | 978 | 94 | if (l_null_map && (*l_null_map)[data_index]) { | 979 | 4 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 980 | 4 | continue; | 981 | 4 | } | 982 | | | 983 | 90 | size_t l_off = loffsets[data_index - 1]; | 984 | 90 | size_t l_size = loffsets[data_index] - l_off; | 985 | 90 | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 986 | 90 | if (rdata_columns.size() == 1) { // just return origin value | 987 | 90 | const auto path_index = index_check_const(i, path_const[0]); | 988 | 90 | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 989 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 990 | 0 | continue; | 991 | 0 | } | 992 | | | 993 | 90 | if (!path_const[0]) { | 994 | 90 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 995 | 90 | } | 996 | | | 997 | 90 | writer->reset(); | 998 | 90 | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 999 | 90 | l_size, json_path_list[0]); | 1000 | 90 | } else { // will make array string to user | 1001 | 0 | writer->reset(); | 1002 | 0 | bool has_value = false; | 1003 | | | 1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 1005 | 0 | const JsonbDocument* doc = nullptr; | 1006 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1007 | |
| 1008 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1009 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1010 | 0 | continue; | 1011 | 0 | } | 1012 | | | 1013 | 0 | const auto path_index = index_check_const(i, path_const[pi]); | 1014 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1015 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1016 | 0 | break; | 1017 | 0 | } | 1018 | | | 1019 | 0 | if (!path_const[pi]) { | 1020 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1021 | 0 | } | 1022 | | | 1023 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1024 | |
| 1025 | 0 | if (find_result.value) { | 1026 | 0 | if (!has_value) { | 1027 | 0 | has_value = true; | 1028 | 0 | writer->writeStartArray(); | 1029 | 0 | } | 1030 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1032 | | // if value is array, we should write all items in array, instead of write the array itself. | 1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1034 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1035 | 0 | writer->writeValue(&item); | 1036 | 0 | } | 1037 | 0 | } else { | 1038 | 0 | writer->writeValue(find_result.value); | 1039 | 0 | } | 1040 | 0 | } | 1041 | 0 | } | 1042 | 0 | if (has_value) { | 1043 | 0 | writer->writeEndArray(); | 1044 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1045 | 0 | writer->getOutput()->getSize()), | 1046 | 0 | i, res_data, res_offsets); | 1047 | 0 | } else { | 1048 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1049 | 0 | } | 1050 | 0 | } | 1051 | 90 | } //for | 1052 | 4 | return Status::OK(); | 1053 | 4 | } |
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ |
1054 | | |
1055 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1056 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1057 | | const ColumnString::Chars& rdata, |
1058 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1059 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1060 | | NullMap& null_map) { |
1061 | | size_t input_rows_count = loffsets.size(); |
1062 | | res_offsets.resize(input_rows_count); |
1063 | | |
1064 | | std::unique_ptr<JsonbToJson> formater; |
1065 | | |
1066 | | JsonbWriter writer; |
1067 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1068 | | if (l_null_map && (*l_null_map)[i]) { |
1069 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1070 | | continue; |
1071 | | } |
1072 | | |
1073 | | if (r_null_map && (*r_null_map)[i]) { |
1074 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1075 | | continue; |
1076 | | } |
1077 | | |
1078 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1079 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1080 | | |
1081 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1082 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1083 | | |
1084 | | JsonbPath path; |
1085 | | if (!path.seek(r_raw, r_size)) { |
1086 | | return Status::InvalidArgument( |
1087 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1088 | | std::string_view(r_raw, r_size), i); |
1089 | | } |
1090 | | |
1091 | | writer.reset(); |
1092 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1093 | | path); |
1094 | | } //for |
1095 | | return Status::OK(); |
1096 | | } //function |
1097 | | |
1098 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1099 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1100 | | const StringRef& rdata, ColumnString::Chars& res_data, |
1101 | | ColumnString::Offsets& res_offsets, NullMap& null_map) { |
1102 | | size_t input_rows_count = loffsets.size(); |
1103 | | res_offsets.resize(input_rows_count); |
1104 | | |
1105 | | std::unique_ptr<JsonbToJson> formater; |
1106 | | |
1107 | | JsonbPath path; |
1108 | | if (!path.seek(rdata.data, rdata.size)) { |
1109 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1110 | | std::string_view(rdata.data, rdata.size)); |
1111 | | } |
1112 | | |
1113 | | JsonbWriter writer; |
1114 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1115 | | if (l_null_map && (*l_null_map)[i]) { |
1116 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1117 | | continue; |
1118 | | } |
1119 | | |
1120 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1121 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1122 | | |
1123 | | writer.reset(); |
1124 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1125 | | path); |
1126 | | } //for |
1127 | | return Status::OK(); |
1128 | | } //function |
1129 | | |
1130 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1131 | | const ColumnString::Chars& rdata, |
1132 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1133 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1134 | | NullMap& null_map) { |
1135 | | size_t input_rows_count = roffsets.size(); |
1136 | | res_offsets.resize(input_rows_count); |
1137 | | |
1138 | | std::unique_ptr<JsonbToJson> formater; |
1139 | | |
1140 | | JsonbWriter writer; |
1141 | | |
1142 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1143 | | if (r_null_map && (*r_null_map)[i]) { |
1144 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1145 | | continue; |
1146 | | } |
1147 | | |
1148 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1149 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1150 | | |
1151 | | JsonbPath path; |
1152 | | if (!path.seek(r_raw, r_size)) { |
1153 | | return Status::InvalidArgument( |
1154 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1155 | | std::string_view(r_raw, r_size), i); |
1156 | | } |
1157 | | |
1158 | | writer.reset(); |
1159 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data, |
1160 | | ldata.size, path); |
1161 | | } //for |
1162 | | return Status::OK(); |
1163 | | } //function |
1164 | | }; |
1165 | | |
1166 | | struct JsonbExtractIsnull { |
1167 | | static constexpr auto name = "json_extract_isnull"; |
1168 | | static constexpr auto alias = "jsonb_extract_isnull"; |
1169 | | |
1170 | | using ReturnType = DataTypeUInt8; |
1171 | | using ColumnType = ColumnUInt8; |
1172 | | using Container = typename ColumnType::Container; |
1173 | | |
1174 | | private: |
1175 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map, |
1176 | | const char* l_raw_str, size_t l_str_size, |
1177 | 0 | JsonbPath& path) { |
1178 | 0 | if (null_map[i]) { |
1179 | 0 | res[i] = 0; |
1180 | 0 | return; |
1181 | 0 | } |
1182 | | |
1183 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1184 | 0 | const JsonbDocument* doc = nullptr; |
1185 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
1186 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1187 | 0 | null_map[i] = 1; |
1188 | 0 | res[i] = 0; |
1189 | 0 | return; |
1190 | 0 | } |
1191 | | |
1192 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
1193 | 0 | auto find_result = doc->getValue()->findValue(path); |
1194 | 0 | const auto* value = find_result.value; |
1195 | |
|
1196 | 0 | if (UNLIKELY(!value)) { |
1197 | 0 | null_map[i] = 1; |
1198 | 0 | res[i] = 0; |
1199 | 0 | return; |
1200 | 0 | } |
1201 | | |
1202 | 0 | res[i] = value->isNull(); |
1203 | 0 | } |
1204 | | |
1205 | | public: |
1206 | | // for jsonb_extract_int/int64/double |
1207 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1208 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1209 | | const ColumnString::Chars& rdata, |
1210 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1211 | 0 | Container& res, NullMap& null_map) { |
1212 | 0 | size_t size = loffsets.size(); |
1213 | 0 | res.resize(size); |
1214 | |
|
1215 | 0 | for (size_t i = 0; i < loffsets.size(); i++) { |
1216 | 0 | if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) { |
1217 | 0 | res[i] = 0; |
1218 | 0 | null_map[i] = 1; |
1219 | 0 | continue; |
1220 | 0 | } |
1221 | | |
1222 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1223 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1224 | |
|
1225 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1226 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1227 | |
|
1228 | 0 | JsonbPath path; |
1229 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
1230 | 0 | return Status::InvalidArgument( |
1231 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1232 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1233 | 0 | } |
1234 | | |
1235 | 0 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1236 | 0 | } //for |
1237 | 0 | return Status::OK(); |
1238 | 0 | } //function |
1239 | | |
1240 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1241 | | const ColumnString::Chars& rdata, |
1242 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1243 | 0 | Container& res, NullMap& null_map) { |
1244 | 0 | size_t size = roffsets.size(); |
1245 | 0 | res.resize(size); |
1246 | |
|
1247 | 0 | for (size_t i = 0; i < size; i++) { |
1248 | 0 | if (r_null_map && (*r_null_map)[i]) { |
1249 | 0 | res[i] = 0; |
1250 | 0 | null_map[i] = 1; |
1251 | 0 | continue; |
1252 | 0 | } |
1253 | | |
1254 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1255 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1256 | |
|
1257 | 0 | JsonbPath path; |
1258 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
1259 | 0 | return Status::InvalidArgument( |
1260 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1261 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1262 | 0 | } |
1263 | | |
1264 | 0 | inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path); |
1265 | 0 | } //for |
1266 | 0 | return Status::OK(); |
1267 | 0 | } //function |
1268 | | |
1269 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1270 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1271 | 0 | const StringRef& rdata, Container& res, NullMap& null_map) { |
1272 | 0 | size_t size = loffsets.size(); |
1273 | 0 | res.resize(size); |
1274 | |
|
1275 | 0 | JsonbPath path; |
1276 | 0 | if (!path.seek(rdata.data, rdata.size)) { |
1277 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1278 | 0 | std::string_view(rdata.data, rdata.size)); |
1279 | 0 | } |
1280 | | |
1281 | 0 | for (size_t i = 0; i < loffsets.size(); i++) { |
1282 | 0 | if (l_null_map && (*l_null_map)[i]) { |
1283 | 0 | res[i] = 0; |
1284 | 0 | null_map[i] = 1; |
1285 | 0 | continue; |
1286 | 0 | } |
1287 | | |
1288 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1289 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1290 | |
|
1291 | 0 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1292 | 0 | } //for |
1293 | 0 | return Status::OK(); |
1294 | 0 | } //function |
1295 | | }; |
1296 | | |
1297 | | struct JsonbTypeJson { |
1298 | | using T = std::string; |
1299 | | using ReturnType = DataTypeJsonb; |
1300 | | using ColumnType = ColumnString; |
1301 | | static const bool only_get_type = false; |
1302 | | static const bool no_quotes = false; |
1303 | | }; |
1304 | | |
1305 | | struct JsonbTypeJsonNoQuotes { |
1306 | | using T = std::string; |
1307 | | using ReturnType = DataTypeJsonb; |
1308 | | using ColumnType = ColumnString; |
1309 | | static const bool only_get_type = false; |
1310 | | static const bool no_quotes = true; |
1311 | | }; |
1312 | | |
1313 | | struct JsonbTypeType { |
1314 | | using T = std::string; |
1315 | | using ReturnType = DataTypeString; |
1316 | | using ColumnType = ColumnString; |
1317 | | static const bool only_get_type = true; |
1318 | | static const bool no_quotes = false; |
1319 | | }; |
1320 | | |
1321 | | struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> { |
1322 | | static constexpr auto name = "jsonb_extract"; |
1323 | | static constexpr auto alias = "json_extract"; |
1324 | | }; |
1325 | | |
1326 | | struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> { |
1327 | | static constexpr auto name = "jsonb_extract_no_quotes"; |
1328 | | static constexpr auto alias = "json_extract_no_quotes"; |
1329 | | }; |
1330 | | |
1331 | | struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> { |
1332 | | static constexpr auto name = "json_type"; |
1333 | | static constexpr auto alias = "jsonb_type"; |
1334 | | }; |
1335 | | |
1336 | | using FunctionJsonbExists = FunctionJsonbExtractPath; |
1337 | | using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>; |
1338 | | |
1339 | | using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>; |
1340 | | using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>; |
1341 | | using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>; |
1342 | | |
1343 | | template <typename Impl> |
1344 | | class FunctionJsonbLength : public IFunction { |
1345 | | public: |
1346 | | static constexpr auto name = "json_length"; |
1347 | 1 | String get_name() const override { return name; } |
1348 | 4 | static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); } |
1349 | | |
1350 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1351 | 2 | return make_nullable(std::make_shared<DataTypeInt32>()); |
1352 | 2 | } |
1353 | 3 | DataTypes get_variadic_argument_types_impl() const override { |
1354 | 3 | return Impl::get_variadic_argument_types(); |
1355 | 3 | } |
1356 | 2 | size_t get_number_of_arguments() const override { |
1357 | 2 | return get_variadic_argument_types_impl().size(); |
1358 | 2 | } |
1359 | | |
1360 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
1361 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1362 | 2 | uint32_t result, size_t input_rows_count) const override { |
1363 | 2 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1364 | 2 | } |
1365 | | }; |
1366 | | |
1367 | | struct JsonbLengthUtil { |
1368 | | static Status jsonb_length_execute(FunctionContext* context, Block& block, |
1369 | | const ColumnNumbers& arguments, uint32_t result, |
1370 | 2 | size_t input_rows_count) { |
1371 | 2 | DCHECK_GE(arguments.size(), 2); |
1372 | 2 | ColumnPtr jsonb_data_column; |
1373 | 2 | bool jsonb_data_const = false; |
1374 | | // prepare jsonb data column |
1375 | 2 | std::tie(jsonb_data_column, jsonb_data_const) = |
1376 | 2 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1377 | 2 | ColumnPtr path_column; |
1378 | 2 | bool is_const = false; |
1379 | 2 | std::tie(path_column, is_const) = |
1380 | 2 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1381 | | |
1382 | 2 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1383 | 2 | auto return_type = block.get_data_type(result); |
1384 | 2 | MutableColumnPtr res = return_type->create_column(); |
1385 | | |
1386 | 2 | JsonbPath path; |
1387 | 2 | if (is_const) { |
1388 | 1 | if (path_column->is_null_at(0)) { |
1389 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1390 | 1 | null_map->get_data()[i] = 1; |
1391 | 1 | res->insert_data(nullptr, 0); |
1392 | 1 | } |
1393 | | |
1394 | 1 | block.replace_by_position( |
1395 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1396 | 1 | return Status::OK(); |
1397 | 1 | } |
1398 | | |
1399 | 0 | auto path_value = path_column->get_data_at(0); |
1400 | 0 | if (!path.seek(path_value.data, path_value.size)) { |
1401 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1402 | 0 | std::string_view(path_value.data, path_value.size)); |
1403 | 0 | } |
1404 | 0 | } |
1405 | | |
1406 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { |
1407 | 4 | if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) || |
1408 | 4 | (jsonb_data_column->get_data_at(i).size == 0)) { |
1409 | 1 | null_map->get_data()[i] = 1; |
1410 | 1 | res->insert_data(nullptr, 0); |
1411 | 1 | continue; |
1412 | 1 | } |
1413 | 3 | if (!is_const) { |
1414 | 3 | auto path_value = path_column->get_data_at(i); |
1415 | 3 | path.clean(); |
1416 | 3 | if (!path.seek(path_value.data, path_value.size)) { |
1417 | 0 | return Status::InvalidArgument( |
1418 | 0 | "Json path error: Invalid Json Path for value: {}", |
1419 | 0 | std::string_view(reinterpret_cast<const char*>(path_value.data), |
1420 | 0 | path_value.size)); |
1421 | 0 | } |
1422 | 3 | } |
1423 | 3 | auto jsonb_value = jsonb_data_column->get_data_at(i); |
1424 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1425 | 3 | const JsonbDocument* doc = nullptr; |
1426 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data, |
1427 | 3 | jsonb_value.size, &doc)); |
1428 | 3 | auto find_result = doc->getValue()->findValue(path); |
1429 | 3 | const auto* value = find_result.value; |
1430 | 3 | if (UNLIKELY(!value)) { |
1431 | 0 | null_map->get_data()[i] = 1; |
1432 | 0 | res->insert_data(nullptr, 0); |
1433 | 0 | continue; |
1434 | 0 | } |
1435 | 3 | auto length = value->numElements(); |
1436 | 3 | res->insert_data(const_cast<const char*>((char*)&length), 0); |
1437 | 3 | } |
1438 | 1 | block.replace_by_position(result, |
1439 | 1 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1440 | 1 | return Status::OK(); |
1441 | 1 | } |
1442 | | }; |
1443 | | |
1444 | | struct JsonbLengthAndPathImpl { |
1445 | 3 | static DataTypes get_variadic_argument_types() { |
1446 | 3 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; |
1447 | 3 | } |
1448 | | |
1449 | | static Status execute_impl(FunctionContext* context, Block& block, |
1450 | | const ColumnNumbers& arguments, uint32_t result, |
1451 | 2 | size_t input_rows_count) { |
1452 | 2 | return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result, |
1453 | 2 | input_rows_count); |
1454 | 2 | } |
1455 | | }; |
1456 | | |
1457 | | template <typename Impl> |
1458 | | class FunctionJsonbContains : public IFunction { |
1459 | | public: |
1460 | | static constexpr auto name = "json_contains"; |
1461 | 1 | String get_name() const override { return name; } |
1462 | 4 | static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); } |
1463 | | |
1464 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1465 | 2 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
1466 | 2 | } |
1467 | 3 | DataTypes get_variadic_argument_types_impl() const override { |
1468 | 3 | return Impl::get_variadic_argument_types(); |
1469 | 3 | } |
1470 | 2 | size_t get_number_of_arguments() const override { |
1471 | 2 | return get_variadic_argument_types_impl().size(); |
1472 | 2 | } |
1473 | | |
1474 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
1475 | | |
1476 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1477 | 2 | uint32_t result, size_t input_rows_count) const override { |
1478 | 2 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1479 | 2 | } |
1480 | | }; |
1481 | | |
1482 | | struct JsonbContainsUtil { |
1483 | | static Status jsonb_contains_execute(FunctionContext* context, Block& block, |
1484 | | const ColumnNumbers& arguments, uint32_t result, |
1485 | 2 | size_t input_rows_count) { |
1486 | 2 | DCHECK_GE(arguments.size(), 3); |
1487 | | |
1488 | 2 | auto jsonb_data1_column = block.get_by_position(arguments[0]).column; |
1489 | 2 | auto jsonb_data2_column = block.get_by_position(arguments[1]).column; |
1490 | | |
1491 | 2 | ColumnPtr path_column; |
1492 | 2 | bool is_const = false; |
1493 | 2 | std::tie(path_column, is_const) = |
1494 | 2 | unpack_if_const(block.get_by_position(arguments[2]).column); |
1495 | | |
1496 | 2 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1497 | 2 | auto return_type = block.get_data_type(result); |
1498 | 2 | MutableColumnPtr res = return_type->create_column(); |
1499 | | |
1500 | 2 | JsonbPath path; |
1501 | 2 | if (is_const) { |
1502 | 1 | if (path_column->is_null_at(0)) { |
1503 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1504 | 1 | null_map->get_data()[i] = 1; |
1505 | 1 | res->insert_data(nullptr, 0); |
1506 | 1 | } |
1507 | | |
1508 | 1 | block.replace_by_position( |
1509 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1510 | 1 | return Status::OK(); |
1511 | 1 | } |
1512 | | |
1513 | 0 | auto path_value = path_column->get_data_at(0); |
1514 | 0 | if (!path.seek(path_value.data, path_value.size)) { |
1515 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1516 | 0 | std::string_view(path_value.data, path_value.size)); |
1517 | 0 | } |
1518 | 0 | } |
1519 | | |
1520 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { |
1521 | 4 | if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) || |
1522 | 4 | path_column->is_null_at(i)) { |
1523 | 1 | null_map->get_data()[i] = 1; |
1524 | 1 | res->insert_data(nullptr, 0); |
1525 | 1 | continue; |
1526 | 1 | } |
1527 | | |
1528 | 3 | if (!is_const) { |
1529 | 3 | auto path_value = path_column->get_data_at(i); |
1530 | 3 | path.clean(); |
1531 | 3 | if (!path.seek(path_value.data, path_value.size)) { |
1532 | 0 | return Status::InvalidArgument( |
1533 | 0 | "Json path error: Invalid Json Path for value: {}", |
1534 | 0 | std::string_view(path_value.data, path_value.size)); |
1535 | 0 | } |
1536 | 3 | } |
1537 | | |
1538 | 3 | auto jsonb_value1 = jsonb_data1_column->get_data_at(i); |
1539 | 3 | auto jsonb_value2 = jsonb_data2_column->get_data_at(i); |
1540 | | |
1541 | 3 | if (jsonb_value1.size == 0 || jsonb_value2.size == 0) { |
1542 | 1 | null_map->get_data()[i] = 1; |
1543 | 1 | res->insert_data(nullptr, 0); |
1544 | 1 | continue; |
1545 | 1 | } |
1546 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1547 | 2 | const JsonbDocument* doc1 = nullptr; |
1548 | 2 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data, |
1549 | 2 | jsonb_value1.size, &doc1)); |
1550 | 2 | const JsonbDocument* doc2 = nullptr; |
1551 | 2 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data, |
1552 | 2 | jsonb_value2.size, &doc2)); |
1553 | | |
1554 | 2 | auto find_result = doc1->getValue()->findValue(path); |
1555 | 2 | const auto* value1 = find_result.value; |
1556 | 2 | const JsonbValue* value2 = doc2->getValue(); |
1557 | 2 | if (!value1 || !value2) { |
1558 | 0 | null_map->get_data()[i] = 1; |
1559 | 0 | res->insert_data(nullptr, 0); |
1560 | 0 | continue; |
1561 | 0 | } |
1562 | 2 | auto contains_value = value1->contains(value2); |
1563 | 2 | res->insert_data(const_cast<const char*>((char*)&contains_value), 0); |
1564 | 2 | } |
1565 | | |
1566 | 1 | block.replace_by_position(result, |
1567 | 1 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1568 | 1 | return Status::OK(); |
1569 | 1 | } |
1570 | | }; |
1571 | | |
1572 | | template <bool ignore_null> |
1573 | | class FunctionJsonbArray : public IFunction { |
1574 | | public: |
1575 | | static constexpr auto name = "json_array"; |
1576 | | static constexpr auto alias = "jsonb_array"; |
1577 | | |
1578 | 6 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }_ZN5doris18FunctionJsonbArrayILb0EE6createEv Line | Count | Source | 1578 | 3 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
_ZN5doris18FunctionJsonbArrayILb1EE6createEv Line | Count | Source | 1578 | 3 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
|
1579 | | |
1580 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev |
1581 | | |
1582 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv |
1583 | 4 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv Line | Count | Source | 1583 | 2 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv Line | Count | Source | 1583 | 2 | bool is_variadic() const override { return true; } |
|
1584 | | |
1585 | 4 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1585 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1585 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1586 | | |
1587 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1588 | 2 | return std::make_shared<DataTypeJsonb>(); |
1589 | 2 | } _ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1587 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1588 | 1 | return std::make_shared<DataTypeJsonb>(); | 1589 | 1 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1587 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1588 | 1 | return std::make_shared<DataTypeJsonb>(); | 1589 | 1 | } |
|
1590 | | |
1591 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1592 | 2 | uint32_t result, size_t input_rows_count) const override { |
1593 | 2 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1594 | 2 | auto column = return_data_type->create_column(); |
1595 | 2 | column->reserve(input_rows_count); |
1596 | | |
1597 | 2 | JsonbWriter writer; |
1598 | 23 | for (size_t i = 0; i < input_rows_count; ++i) { |
1599 | 20 | writer.writeStartArray(); |
1600 | 20 | for (auto argument : arguments) { |
1601 | 20 | auto&& [arg_column, is_const] = |
1602 | 20 | unpack_if_const(block.get_by_position(argument).column); |
1603 | 20 | if (arg_column->is_nullable()) { |
1604 | 20 | const auto& nullable_column = |
1605 | 20 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1606 | 20 | *arg_column); |
1607 | 20 | const auto& null_map = nullable_column.get_null_map_data(); |
1608 | 20 | const auto& nested_column = nullable_column.get_nested_column(); |
1609 | 20 | const auto& jsonb_column = |
1610 | 20 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1611 | 20 | nested_column); |
1612 | | |
1613 | 20 | auto index = index_check_const(i, is_const); |
1614 | 20 | if (null_map[index]) { |
1615 | 2 | if constexpr (ignore_null) { |
1616 | 1 | continue; |
1617 | 1 | } else { |
1618 | 1 | writer.writeNull(); |
1619 | 1 | } |
1620 | 18 | } else { |
1621 | 18 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1622 | 18 | const JsonbDocument* doc = nullptr; |
1623 | 18 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1624 | 18 | jsonb_binary.size, &doc); |
1625 | 18 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1626 | 0 | if constexpr (ignore_null) { |
1627 | 0 | continue; |
1628 | 0 | } else { |
1629 | 0 | writer.writeNull(); |
1630 | 0 | } |
1631 | 18 | } else { |
1632 | 18 | writer.writeValue(doc->getValue()); |
1633 | 18 | } |
1634 | 18 | } |
1635 | 20 | } else { |
1636 | 0 | const auto& jsonb_column = |
1637 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1638 | 0 | *arg_column); |
1639 | |
|
1640 | 0 | auto index = index_check_const(i, is_const); |
1641 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1642 | 0 | const JsonbDocument* doc = nullptr; |
1643 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1644 | 0 | jsonb_binary.size, &doc); |
1645 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1646 | 0 | if constexpr (ignore_null) { |
1647 | 0 | continue; |
1648 | 0 | } else { |
1649 | 0 | writer.writeNull(); |
1650 | 0 | } |
1651 | 0 | } else { |
1652 | 0 | writer.writeValue(doc->getValue()); |
1653 | 0 | } |
1654 | 0 | } |
1655 | 20 | } |
1656 | 11 | writer.writeEndArray(); |
1657 | 11 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1658 | 11 | writer.reset(); |
1659 | 11 | } |
1660 | | |
1661 | 2 | block.get_by_position(result).column = std::move(column); |
1662 | 2 | return Status::OK(); |
1663 | 2 | } _ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1592 | 1 | uint32_t result, size_t input_rows_count) const override { | 1593 | 1 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1594 | 1 | auto column = return_data_type->create_column(); | 1595 | 1 | column->reserve(input_rows_count); | 1596 | | | 1597 | 1 | JsonbWriter writer; | 1598 | 11 | for (size_t i = 0; i < input_rows_count; ++i) { | 1599 | 10 | writer.writeStartArray(); | 1600 | 10 | for (auto argument : arguments) { | 1601 | 10 | auto&& [arg_column, is_const] = | 1602 | 10 | unpack_if_const(block.get_by_position(argument).column); | 1603 | 10 | if (arg_column->is_nullable()) { | 1604 | 10 | const auto& nullable_column = | 1605 | 10 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1606 | 10 | *arg_column); | 1607 | 10 | const auto& null_map = nullable_column.get_null_map_data(); | 1608 | 10 | const auto& nested_column = nullable_column.get_nested_column(); | 1609 | 10 | const auto& jsonb_column = | 1610 | 10 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1611 | 10 | nested_column); | 1612 | | | 1613 | 10 | auto index = index_check_const(i, is_const); | 1614 | 10 | if (null_map[index]) { | 1615 | | if constexpr (ignore_null) { | 1616 | | continue; | 1617 | 1 | } else { | 1618 | 1 | writer.writeNull(); | 1619 | 1 | } | 1620 | 9 | } else { | 1621 | 9 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1622 | 9 | const JsonbDocument* doc = nullptr; | 1623 | 9 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1624 | 9 | jsonb_binary.size, &doc); | 1625 | 9 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1626 | | if constexpr (ignore_null) { | 1627 | | continue; | 1628 | 0 | } else { | 1629 | 0 | writer.writeNull(); | 1630 | 0 | } | 1631 | 9 | } else { | 1632 | 9 | writer.writeValue(doc->getValue()); | 1633 | 9 | } | 1634 | 9 | } | 1635 | 10 | } else { | 1636 | 0 | const auto& jsonb_column = | 1637 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1638 | 0 | *arg_column); | 1639 | |
| 1640 | 0 | auto index = index_check_const(i, is_const); | 1641 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1642 | 0 | const JsonbDocument* doc = nullptr; | 1643 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1644 | 0 | jsonb_binary.size, &doc); | 1645 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1646 | | if constexpr (ignore_null) { | 1647 | | continue; | 1648 | 0 | } else { | 1649 | 0 | writer.writeNull(); | 1650 | 0 | } | 1651 | 0 | } else { | 1652 | 0 | writer.writeValue(doc->getValue()); | 1653 | 0 | } | 1654 | 0 | } | 1655 | 10 | } | 1656 | 10 | writer.writeEndArray(); | 1657 | 10 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1658 | 10 | writer.reset(); | 1659 | 10 | } | 1660 | | | 1661 | 1 | block.get_by_position(result).column = std::move(column); | 1662 | 1 | return Status::OK(); | 1663 | 1 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1592 | 1 | uint32_t result, size_t input_rows_count) const override { | 1593 | 1 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1594 | 1 | auto column = return_data_type->create_column(); | 1595 | 1 | column->reserve(input_rows_count); | 1596 | | | 1597 | 1 | JsonbWriter writer; | 1598 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { | 1599 | 10 | writer.writeStartArray(); | 1600 | 10 | for (auto argument : arguments) { | 1601 | 10 | auto&& [arg_column, is_const] = | 1602 | 10 | unpack_if_const(block.get_by_position(argument).column); | 1603 | 10 | if (arg_column->is_nullable()) { | 1604 | 10 | const auto& nullable_column = | 1605 | 10 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1606 | 10 | *arg_column); | 1607 | 10 | const auto& null_map = nullable_column.get_null_map_data(); | 1608 | 10 | const auto& nested_column = nullable_column.get_nested_column(); | 1609 | 10 | const auto& jsonb_column = | 1610 | 10 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1611 | 10 | nested_column); | 1612 | | | 1613 | 10 | auto index = index_check_const(i, is_const); | 1614 | 10 | if (null_map[index]) { | 1615 | 1 | if constexpr (ignore_null) { | 1616 | 1 | continue; | 1617 | | } else { | 1618 | | writer.writeNull(); | 1619 | | } | 1620 | 9 | } else { | 1621 | 9 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1622 | 9 | const JsonbDocument* doc = nullptr; | 1623 | 9 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1624 | 9 | jsonb_binary.size, &doc); | 1625 | 9 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1626 | 0 | if constexpr (ignore_null) { | 1627 | 0 | continue; | 1628 | | } else { | 1629 | | writer.writeNull(); | 1630 | | } | 1631 | 9 | } else { | 1632 | 9 | writer.writeValue(doc->getValue()); | 1633 | 9 | } | 1634 | 9 | } | 1635 | 10 | } else { | 1636 | 0 | const auto& jsonb_column = | 1637 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1638 | 0 | *arg_column); | 1639 | |
| 1640 | 0 | auto index = index_check_const(i, is_const); | 1641 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1642 | 0 | const JsonbDocument* doc = nullptr; | 1643 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1644 | 0 | jsonb_binary.size, &doc); | 1645 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1646 | 0 | if constexpr (ignore_null) { | 1647 | 0 | continue; | 1648 | | } else { | 1649 | | writer.writeNull(); | 1650 | | } | 1651 | 0 | } else { | 1652 | 0 | writer.writeValue(doc->getValue()); | 1653 | 0 | } | 1654 | 0 | } | 1655 | 10 | } | 1656 | 11 | writer.writeEndArray(); | 1657 | 11 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1658 | 11 | writer.reset(); | 1659 | 11 | } | 1660 | | | 1661 | 2 | block.get_by_position(result).column = std::move(column); | 1662 | 2 | return Status::OK(); | 1663 | 1 | } |
|
1664 | | }; |
1665 | | |
1666 | | class FunctionJsonbObject : public IFunction { |
1667 | | public: |
1668 | | static constexpr auto name = "json_object"; |
1669 | | static constexpr auto alias = "jsonb_object"; |
1670 | | |
1671 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); } |
1672 | | |
1673 | 0 | String get_name() const override { return name; } |
1674 | | |
1675 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1676 | 1 | bool is_variadic() const override { return true; } |
1677 | | |
1678 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
1679 | | |
1680 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1681 | 0 | return std::make_shared<DataTypeJsonb>(); |
1682 | 0 | } |
1683 | | |
1684 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1685 | 0 | uint32_t result, size_t input_rows_count) const override { |
1686 | 0 | if (arguments.size() % 2 != 0) { |
1687 | 0 | return Status::InvalidArgument( |
1688 | 0 | "JSON object must have an even number of arguments, but got: {}", |
1689 | 0 | arguments.size()); |
1690 | 0 | } |
1691 | | |
1692 | 0 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1693 | |
|
1694 | 0 | auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const, |
1695 | 0 | const NullMap* null_map, const size_t arg_index, const size_t row_idx) { |
1696 | 0 | auto index = index_check_const(row_idx, is_const); |
1697 | 0 | if (null_map && (*null_map)[index]) { |
1698 | 0 | return Status::InvalidArgument( |
1699 | 0 | "JSON documents may not contain NULL member name(argument " |
1700 | 0 | "index: " |
1701 | 0 | "{}, row index: {})", |
1702 | 0 | row_idx, arg_index); |
1703 | 0 | } |
1704 | | |
1705 | 0 | auto key_string = key_col.get_data_at(index); |
1706 | 0 | if (key_string.size > 255) { |
1707 | 0 | return Status::InvalidArgument( |
1708 | 0 | "JSON object keys(argument index: {}) must be less than 256 " |
1709 | 0 | "bytes, but got size: {}", |
1710 | 0 | arg_index, key_string.size); |
1711 | 0 | } |
1712 | 0 | writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size)); |
1713 | 0 | return Status::OK(); |
1714 | 0 | }; |
1715 | |
|
1716 | 0 | auto write_value = [](JsonbWriter& writer, const ColumnString& value_col, |
1717 | 0 | const bool is_const, const NullMap* null_map, const size_t arg_index, |
1718 | 0 | const size_t row_idx) { |
1719 | 0 | auto index = index_check_const(row_idx, is_const); |
1720 | 0 | if (null_map && (*null_map)[index]) { |
1721 | 0 | writer.writeNull(); |
1722 | 0 | return Status::OK(); |
1723 | 0 | } |
1724 | | |
1725 | 0 | auto value_string = value_col.get_data_at(index); |
1726 | 0 | const JsonbDocument* doc = nullptr; |
1727 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
1728 | 0 | value_string.size, &doc)); |
1729 | 0 | writer.writeValue(doc->getValue()); |
1730 | 0 | return Status::OK(); |
1731 | 0 | }; |
1732 | |
|
1733 | 0 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1734 | 0 | auto key_argument = arguments[arg_idx]; |
1735 | 0 | auto value_argument = arguments[arg_idx + 1]; |
1736 | |
|
1737 | 0 | auto& key_data_type = block.get_by_position(key_argument).type; |
1738 | 0 | auto& value_data_type = block.get_by_position(value_argument).type; |
1739 | 0 | if (!is_string_type(key_data_type->get_primitive_type())) { |
1740 | 0 | return Status::InvalidArgument( |
1741 | 0 | "JSON object key(argument index: {}) must be String, but got type: " |
1742 | 0 | "{}(primitive type: {})", |
1743 | 0 | arg_idx, key_data_type->get_name(), |
1744 | 0 | static_cast<int>(key_data_type->get_primitive_type())); |
1745 | 0 | } |
1746 | | |
1747 | 0 | if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) { |
1748 | 0 | return Status::InvalidArgument( |
1749 | 0 | "JSON object value(argument index: {}) must be JSON, but got type: {}", |
1750 | 0 | arg_idx, value_data_type->get_name()); |
1751 | 0 | } |
1752 | 0 | } |
1753 | | |
1754 | 0 | auto column = return_data_type->create_column(); |
1755 | 0 | column->reserve(input_rows_count); |
1756 | |
|
1757 | 0 | JsonbWriter writer; |
1758 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1759 | 0 | writer.writeStartObject(); |
1760 | 0 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1761 | 0 | auto key_argument = arguments[arg_idx]; |
1762 | 0 | auto value_argument = arguments[arg_idx + 1]; |
1763 | 0 | auto&& [key_column, key_const] = |
1764 | 0 | unpack_if_const(block.get_by_position(key_argument).column); |
1765 | 0 | auto&& [value_column, value_const] = |
1766 | 0 | unpack_if_const(block.get_by_position(value_argument).column); |
1767 | |
|
1768 | 0 | if (key_column->is_nullable()) { |
1769 | 0 | const auto& nullable_column = |
1770 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1771 | 0 | *key_column); |
1772 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
1773 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1774 | 0 | const auto& key_arg_column = |
1775 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1776 | 0 | nested_column); |
1777 | |
|
1778 | 0 | RETURN_IF_ERROR( |
1779 | 0 | write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i)); |
1780 | 0 | } else { |
1781 | 0 | const auto& key_arg_column = |
1782 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1783 | 0 | *key_column); |
1784 | 0 | RETURN_IF_ERROR( |
1785 | 0 | write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i)); |
1786 | 0 | } |
1787 | | |
1788 | 0 | if (value_column->is_nullable()) { |
1789 | 0 | const auto& nullable_column = |
1790 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1791 | 0 | *value_column); |
1792 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
1793 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1794 | 0 | const auto& value_arg_column = |
1795 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1796 | 0 | nested_column); |
1797 | |
|
1798 | 0 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map, |
1799 | 0 | arg_idx + 1, i)); |
1800 | 0 | } else { |
1801 | 0 | const auto& value_arg_column = |
1802 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1803 | 0 | *value_column); |
1804 | 0 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr, |
1805 | 0 | arg_idx + 1, i)); |
1806 | 0 | } |
1807 | 0 | } |
1808 | | |
1809 | 0 | writer.writeEndObject(); |
1810 | 0 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1811 | 0 | writer.reset(); |
1812 | 0 | } |
1813 | | |
1814 | 0 | block.get_by_position(result).column = std::move(column); |
1815 | 0 | return Status::OK(); |
1816 | 0 | } |
1817 | | }; |
1818 | | |
1819 | | enum class JsonbModifyType { Insert, Set, Replace }; |
1820 | | |
1821 | | template <JsonbModifyType modify_type> |
1822 | | struct JsonbModifyName { |
1823 | | static constexpr auto name = "jsonb_modify"; |
1824 | | static constexpr auto alias = "json_modify"; |
1825 | | }; |
1826 | | |
1827 | | template <> |
1828 | | struct JsonbModifyName<JsonbModifyType::Insert> { |
1829 | | static constexpr auto name = "jsonb_insert"; |
1830 | | static constexpr auto alias = "json_insert"; |
1831 | | }; |
1832 | | template <> |
1833 | | struct JsonbModifyName<JsonbModifyType::Set> { |
1834 | | static constexpr auto name = "jsonb_set"; |
1835 | | static constexpr auto alias = "json_set"; |
1836 | | }; |
1837 | | template <> |
1838 | | struct JsonbModifyName<JsonbModifyType::Replace> { |
1839 | | static constexpr auto name = "jsonb_replace"; |
1840 | | static constexpr auto alias = "json_replace"; |
1841 | | }; |
1842 | | |
1843 | | template <JsonbModifyType modify_type> |
1844 | | class FunctionJsonbModify : public IFunction { |
1845 | | public: |
1846 | | static constexpr auto name = JsonbModifyName<modify_type>::name; |
1847 | | static constexpr auto alias = JsonbModifyName<modify_type>::alias; |
1848 | | |
1849 | 6 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv Line | Count | Source | 1849 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv Line | Count | Source | 1849 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv Line | Count | Source | 1849 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
|
1850 | | |
1851 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev |
1852 | | |
1853 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv |
1854 | 3 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv Line | Count | Source | 1854 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv Line | Count | Source | 1854 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv Line | Count | Source | 1854 | 1 | bool is_variadic() const override { return true; } |
|
1855 | | |
1856 | 0 | bool use_default_implementation_for_nulls() const override { return false; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv |
1857 | | |
1858 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1859 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
1860 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
1861 | | |
1862 | | Status create_all_null_result(const DataTypePtr& return_data_type, Block& block, |
1863 | 0 | uint32_t result, size_t input_rows_count) const { |
1864 | 0 | auto result_column = return_data_type->create_column(); |
1865 | 0 | result_column->insert_default(); |
1866 | 0 | auto const_column = ColumnConst::create(std::move(result_column), input_rows_count); |
1867 | 0 | block.get_by_position(result).column = std::move(const_column); |
1868 | 0 | return Status::OK(); |
1869 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm |
1870 | | |
1871 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1872 | 0 | uint32_t result, size_t input_rows_count) const override { |
1873 | 0 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { |
1874 | 0 | return Status::InvalidArgument( |
1875 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " |
1876 | 0 | "but got: {}", |
1877 | 0 | name, arguments.size()); |
1878 | 0 | } |
1879 | | |
1880 | 0 | const size_t keys_count = (arguments.size() - 1) / 2; |
1881 | |
|
1882 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
1883 | |
|
1884 | 0 | auto result_column = return_data_type->create_column(); |
1885 | 0 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); |
1886 | 0 | auto& null_map = result_nullable_col.get_null_map_data(); |
1887 | 0 | auto& res_string_column = |
1888 | 0 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); |
1889 | 0 | auto& res_chars = res_string_column.get_chars(); |
1890 | 0 | auto& res_offsets = res_string_column.get_offsets(); |
1891 | |
|
1892 | 0 | null_map.resize_fill(input_rows_count, 0); |
1893 | 0 | res_offsets.resize(input_rows_count); |
1894 | 0 | auto&& [json_data_arg_column, json_data_const] = |
1895 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1896 | |
|
1897 | 0 | if (json_data_const) { |
1898 | 0 | if (json_data_arg_column->is_null_at(0)) { |
1899 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1900 | 0 | } |
1901 | 0 | } |
1902 | | |
1903 | 0 | std::vector<const ColumnString*> json_path_columns(keys_count); |
1904 | 0 | std::vector<bool> json_path_constant(keys_count); |
1905 | 0 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); |
1906 | |
|
1907 | 0 | std::vector<const ColumnString*> json_value_columns(keys_count); |
1908 | 0 | std::vector<bool> json_value_constant(keys_count); |
1909 | 0 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); |
1910 | |
|
1911 | 0 | const NullMap* json_data_null_map = nullptr; |
1912 | 0 | const ColumnString* json_data_column; |
1913 | 0 | if (json_data_arg_column->is_nullable()) { |
1914 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); |
1915 | 0 | json_data_null_map = &nullable_column.get_null_map_data(); |
1916 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1917 | 0 | json_data_column = assert_cast<const ColumnString*>(&nested_column); |
1918 | 0 | } else { |
1919 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); |
1920 | 0 | } |
1921 | |
|
1922 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
1923 | 0 | auto&& [path_column, path_const] = |
1924 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
1925 | 0 | auto&& [value_column, value_const] = |
1926 | 0 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
1927 | |
|
1928 | 0 | if (path_const) { |
1929 | 0 | if (path_column->is_null_at(0)) { |
1930 | 0 | return create_all_null_result(return_data_type, block, result, |
1931 | 0 | input_rows_count); |
1932 | 0 | } |
1933 | 0 | } |
1934 | | |
1935 | 0 | json_path_constant[i / 2] = path_const; |
1936 | 0 | if (path_column->is_nullable()) { |
1937 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
1938 | 0 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1939 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1940 | 0 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1941 | 0 | } else { |
1942 | 0 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); |
1943 | 0 | } |
1944 | |
|
1945 | 0 | json_value_constant[i / 2] = value_const; |
1946 | 0 | if (value_column->is_nullable()) { |
1947 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); |
1948 | 0 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1949 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1950 | 0 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1951 | 0 | } else { |
1952 | 0 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); |
1953 | 0 | } |
1954 | 0 | } |
1955 | | |
1956 | 0 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); |
1957 | 0 | if (json_data_const) { |
1958 | 0 | auto json_data_string = json_data_column->get_data_at(0); |
1959 | 0 | const JsonbDocument* doc = nullptr; |
1960 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1961 | 0 | json_data_string.size, &doc)); |
1962 | 0 | if (!doc || !doc->getValue()) [[unlikely]] { |
1963 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1964 | 0 | } |
1965 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1966 | 0 | json_documents[i] = doc; |
1967 | 0 | } |
1968 | 0 | } else { |
1969 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1970 | 0 | if (json_data_null_map && (*json_data_null_map)[i]) { |
1971 | 0 | null_map[i] = 1; |
1972 | 0 | json_documents[i] = nullptr; |
1973 | 0 | continue; |
1974 | 0 | } |
1975 | | |
1976 | 0 | auto json_data_string = json_data_column->get_data_at(i); |
1977 | 0 | const JsonbDocument* doc = nullptr; |
1978 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1979 | 0 | json_data_string.size, &doc)); |
1980 | 0 | if (!doc || !doc->getValue()) [[unlikely]] { |
1981 | 0 | null_map[i] = 1; |
1982 | 0 | continue; |
1983 | 0 | } |
1984 | 0 | json_documents[i] = doc; |
1985 | 0 | } |
1986 | 0 | } |
1987 | | |
1988 | 0 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); |
1989 | 0 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); |
1990 | |
|
1991 | 0 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, |
1992 | 0 | json_path_columns, json_path_constant, |
1993 | 0 | json_path_null_maps, json_value_columns, |
1994 | 0 | json_value_constant, json_value_null_maps)); |
1995 | | |
1996 | 0 | JsonbWriter writer; |
1997 | 0 | struct DocumentBuffer { |
1998 | 0 | DorisUniqueBufferPtr<char> ptr; |
1999 | 0 | size_t size = 0; |
2000 | 0 | size_t capacity = 0; |
2001 | 0 | }; |
2002 | |
|
2003 | 0 | DocumentBuffer tmp_buffer; |
2004 | |
|
2005 | 0 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { |
2006 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2007 | 0 | const size_t index = i / 2; |
2008 | 0 | auto& json_path = json_paths[index]; |
2009 | 0 | auto& json_value = json_values[index]; |
2010 | |
|
2011 | 0 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); |
2012 | 0 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); |
2013 | |
|
2014 | 0 | if (null_map[row_idx]) { |
2015 | 0 | continue; |
2016 | 0 | } |
2017 | | |
2018 | 0 | if (json_documents[row_idx] == nullptr) { |
2019 | 0 | null_map[row_idx] = 1; |
2020 | 0 | continue; |
2021 | 0 | } |
2022 | | |
2023 | 0 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { |
2024 | 0 | null_map[row_idx] = 1; |
2025 | 0 | continue; |
2026 | 0 | } |
2027 | | |
2028 | 0 | auto find_result = |
2029 | 0 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); |
2030 | |
|
2031 | 0 | if (find_result.is_wildcard) { |
2032 | 0 | return Status::InvalidArgument( |
2033 | 0 | " In this situation, path expressions may not contain the * and ** " |
2034 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2035 | 0 | i, row_idx); |
2036 | 0 | } |
2037 | | |
2038 | 0 | if constexpr (modify_type == JsonbModifyType::Insert) { |
2039 | 0 | if (find_result.value) { |
2040 | 0 | continue; |
2041 | 0 | } |
2042 | 0 | } else if constexpr (modify_type == JsonbModifyType::Replace) { |
2043 | 0 | if (!find_result.value) { |
2044 | 0 | continue; |
2045 | 0 | } |
2046 | 0 | } |
2047 | | |
2048 | 0 | std::vector<const JsonbValue*> parents; |
2049 | |
|
2050 | 0 | bool replace = false; |
2051 | 0 | parents.emplace_back(json_documents[row_idx]->getValue()); |
2052 | 0 | if (find_result.value) { |
2053 | | // find target path, replace it with the new value. |
2054 | 0 | replace = true; |
2055 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), |
2056 | 0 | json_path[path_index], parents)) { |
2057 | 0 | DCHECK(false); |
2058 | 0 | continue; |
2059 | 0 | } |
2060 | 0 | } else { |
2061 | | // does not find target path, insert the new value. |
2062 | 0 | JsonbPath new_path; |
2063 | 0 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { |
2064 | 0 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); |
2065 | 0 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( |
2066 | 0 | current_leg->leg_ptr, current_leg->leg_len, |
2067 | 0 | current_leg->array_index, current_leg->type); |
2068 | 0 | new_path.add_leg_to_leg_vector(std::move(leg)); |
2069 | 0 | } |
2070 | |
|
2071 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, |
2072 | 0 | parents)) { |
2073 | 0 | continue; |
2074 | 0 | } |
2075 | 0 | } |
2076 | | |
2077 | 0 | const auto legs_count = json_path[path_index].get_leg_vector_size(); |
2078 | 0 | leg_info* last_leg = |
2079 | 0 | legs_count > 0 |
2080 | 0 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) |
2081 | 0 | : nullptr; |
2082 | 0 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, |
2083 | 0 | json_value[value_index], replace, last_leg, |
2084 | 0 | writer)); |
2085 | | |
2086 | 0 | auto* writer_output = writer.getOutput(); |
2087 | 0 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2088 | 0 | tmp_buffer.capacity = |
2089 | 0 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2090 | 0 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); |
2091 | 0 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2092 | 0 | } |
2093 | |
|
2094 | 0 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); |
2095 | 0 | tmp_buffer.size = writer_output->getSize(); |
2096 | |
|
2097 | 0 | writer.reset(); |
2098 | |
|
2099 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2100 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); |
2101 | 0 | } |
2102 | | |
2103 | 0 | if (!null_map[row_idx]) { |
2104 | 0 | const auto* jsonb_document = json_documents[row_idx]; |
2105 | 0 | const auto size = jsonb_document->numPackedBytes(); |
2106 | 0 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), |
2107 | 0 | reinterpret_cast<const char*>(jsonb_document) + size); |
2108 | 0 | } |
2109 | |
|
2110 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2111 | |
|
2112 | 0 | if (!null_map[row_idx]) { |
2113 | 0 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; |
2114 | 0 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; |
2115 | 0 | const JsonbDocument* doc = nullptr; |
2116 | 0 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2117 | 0 | reinterpret_cast<const char*>(ptr), size, &doc)); |
2118 | 0 | } |
2119 | 0 | } |
2120 | | |
2121 | 0 | block.get_by_position(result).column = std::move(result_column); |
2122 | 0 | return Status::OK(); |
2123 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
2124 | | |
2125 | | bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path, |
2126 | 0 | std::vector<const JsonbValue*>& parents) const { |
2127 | 0 | const size_t index = parents.size() - 1; |
2128 | 0 | if (index == path.get_leg_vector_size()) { |
2129 | 0 | return true; |
2130 | 0 | } |
2131 | | |
2132 | 0 | JsonbPath current; |
2133 | 0 | auto* current_leg = path.get_leg_from_leg_vector(index); |
2134 | 0 | std::unique_ptr<leg_info> leg = |
2135 | 0 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, |
2136 | 0 | current_leg->array_index, current_leg->type); |
2137 | 0 | current.add_leg_to_leg_vector(std::move(leg)); |
2138 | |
|
2139 | 0 | auto find_result = root->findValue(current); |
2140 | 0 | if (!find_result.value) { |
2141 | 0 | std::string path_string; |
2142 | 0 | current.to_string(&path_string); |
2143 | 0 | return false; |
2144 | 0 | } else if (find_result.value == root) { |
2145 | 0 | return true; |
2146 | 0 | } else { |
2147 | 0 | parents.emplace_back(find_result.value); |
2148 | 0 | } |
2149 | | |
2150 | 0 | return build_parents_by_path(find_result.value, path, parents); |
2151 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE |
2152 | | |
2153 | | Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents, |
2154 | | const size_t parent_index, const JsonbValue* value, const bool replace, |
2155 | 0 | const leg_info* last_leg, JsonbWriter& writer) const { |
2156 | 0 | if (parent_index >= parents.size()) { |
2157 | 0 | return Status::InvalidArgument( |
2158 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", |
2159 | 0 | parent_index, parents.size()); |
2160 | 0 | } |
2161 | | |
2162 | 0 | if (parents[parent_index] != root) { |
2163 | 0 | return Status::InvalidArgument( |
2164 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " |
2165 | 0 | "parents size: {}", |
2166 | 0 | parent_index, parents.size()); |
2167 | 0 | } |
2168 | | |
2169 | 0 | if (parent_index == parents.size() - 1 && replace) { |
2170 | | // We are at the last parent, write the value directly |
2171 | 0 | if (value == nullptr) { |
2172 | 0 | writer.writeNull(); |
2173 | 0 | } else { |
2174 | 0 | writer.writeValue(value); |
2175 | 0 | } |
2176 | 0 | return Status::OK(); |
2177 | 0 | } |
2178 | | |
2179 | 0 | bool value_written = false; |
2180 | 0 | bool is_last_parent = (parent_index == parents.size() - 1); |
2181 | 0 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; |
2182 | 0 | if (root->isArray()) { |
2183 | 0 | writer.writeStartArray(); |
2184 | 0 | const auto* array_val = root->unpack<ArrayVal>(); |
2185 | 0 | for (int i = 0; i != array_val->numElem(); ++i) { |
2186 | 0 | auto* it = array_val->get(i); |
2187 | |
|
2188 | 0 | if (is_last_parent && last_leg->array_index == i) { |
2189 | 0 | value_written = true; |
2190 | 0 | writer.writeValue(value); |
2191 | 0 | } else if (it == next_parent) { |
2192 | 0 | value_written = true; |
2193 | 0 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, |
2194 | 0 | last_leg, writer)); |
2195 | 0 | } else { |
2196 | 0 | writer.writeValue(it); |
2197 | 0 | } |
2198 | 0 | } |
2199 | 0 | if (is_last_parent && !value_written) { |
2200 | 0 | value_written = true; |
2201 | 0 | writer.writeValue(value); |
2202 | 0 | } |
2203 | |
|
2204 | 0 | writer.writeEndArray(); |
2205 | |
|
2206 | 0 | } else { |
2207 | | /** |
2208 | | Because even for a non-array object, `$[0]` can still point to that object: |
2209 | | ``` |
2210 | | select json_extract('{"key": "value"}', '$[0]'); |
2211 | | +------------------------------------------+ |
2212 | | | json_extract('{"key": "value"}', '$[0]') | |
2213 | | +------------------------------------------+ |
2214 | | | {"key": "value"} | |
2215 | | +------------------------------------------+ |
2216 | | ``` |
2217 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, |
2218 | | it should be converted to an array before insertion: |
2219 | | ``` |
2220 | | select json_insert('123','$[1]', null); |
2221 | | +---------------------------------+ |
2222 | | | json_insert('123','$[1]', null) | |
2223 | | +---------------------------------+ |
2224 | | | [123, null] | |
2225 | | +---------------------------------+ |
2226 | | ``` |
2227 | | */ |
2228 | 0 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { |
2229 | 0 | writer.writeStartArray(); |
2230 | 0 | writer.writeValue(root); |
2231 | 0 | writer.writeValue(value); |
2232 | 0 | writer.writeEndArray(); |
2233 | 0 | return Status::OK(); |
2234 | 0 | } else if (root->isObject()) { |
2235 | 0 | writer.writeStartObject(); |
2236 | 0 | const auto* object_val = root->unpack<ObjectVal>(); |
2237 | 0 | for (const auto& it : *object_val) { |
2238 | 0 | writer.writeKey(it.getKeyStr(), it.klen()); |
2239 | 0 | if (it.value() == next_parent) { |
2240 | 0 | value_written = true; |
2241 | 0 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, |
2242 | 0 | value, replace, last_leg, writer)); |
2243 | 0 | } else { |
2244 | 0 | writer.writeValue(it.value()); |
2245 | 0 | } |
2246 | 0 | } |
2247 | | |
2248 | 0 | if (is_last_parent && !value_written) { |
2249 | 0 | value_written = true; |
2250 | 0 | writer.writeStartObject(); |
2251 | 0 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); |
2252 | 0 | writer.writeValue(value); |
2253 | 0 | writer.writeEndObject(); |
2254 | 0 | } |
2255 | 0 | writer.writeEndObject(); |
2256 | |
|
2257 | 0 | } else { |
2258 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); |
2259 | 0 | } |
2260 | 0 | } |
2261 | | |
2262 | 0 | if (!value_written) { |
2263 | 0 | return Status::InvalidArgument( |
2264 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", |
2265 | 0 | parent_index, parents.size()); |
2266 | 0 | } |
2267 | | |
2268 | 0 | return Status::OK(); |
2269 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE |
2270 | | |
2271 | | Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths, |
2272 | | DorisVector<DorisVector<const JsonbValue*>>& json_values, |
2273 | | const ColumnNumbers& arguments, const size_t input_rows_count, |
2274 | | const std::vector<const ColumnString*>& json_path_columns, |
2275 | | const std::vector<bool>& json_path_constant, |
2276 | | const std::vector<const NullMap*>& json_path_null_maps, |
2277 | | const std::vector<const ColumnString*>& json_value_columns, |
2278 | | const std::vector<bool>& json_value_constant, |
2279 | 0 | const std::vector<const NullMap*>& json_value_null_maps) const { |
2280 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2281 | 0 | const size_t index = i / 2; |
2282 | 0 | const auto* json_path_column = json_path_columns[index]; |
2283 | 0 | const auto* value_column = json_value_columns[index]; |
2284 | |
|
2285 | 0 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); |
2286 | 0 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); |
2287 | |
|
2288 | 0 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { |
2289 | 0 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { |
2290 | 0 | continue; |
2291 | 0 | } |
2292 | | |
2293 | 0 | auto path_string = json_path_column->get_data_at(row_idx); |
2294 | 0 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { |
2295 | 0 | return Status::InvalidArgument( |
2296 | 0 | "Json path error: Invalid Json Path for value: {}, " |
2297 | 0 | "argument " |
2298 | 0 | "index: {}, row index: {}", |
2299 | 0 | std::string_view(path_string.data, path_string.size), i, row_idx); |
2300 | 0 | } |
2301 | | |
2302 | 0 | if (json_paths[index][row_idx].is_wildcard()) { |
2303 | 0 | return Status::InvalidArgument( |
2304 | 0 | "In this situation, path expressions may not contain the * and ** " |
2305 | 0 | "tokens, argument index: {}, row index: {}", |
2306 | 0 | i, row_idx); |
2307 | 0 | } |
2308 | 0 | } |
2309 | | |
2310 | 0 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { |
2311 | 0 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { |
2312 | 0 | continue; |
2313 | 0 | } |
2314 | | |
2315 | 0 | auto value_string = value_column->get_data_at(row_idx); |
2316 | 0 | const JsonbDocument* doc = nullptr; |
2317 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
2318 | 0 | value_string.size, &doc)); |
2319 | 0 | if (doc) { |
2320 | 0 | json_values[index][row_idx] = doc->getValue(); |
2321 | 0 | } |
2322 | 0 | } |
2323 | 0 | } |
2324 | | |
2325 | 0 | return Status::OK(); |
2326 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ |
2327 | | }; |
2328 | | |
2329 | | struct JsonbContainsAndPathImpl { |
2330 | 3 | static DataTypes get_variadic_argument_types() { |
2331 | 3 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(), |
2332 | 3 | std::make_shared<DataTypeString>()}; |
2333 | 3 | } |
2334 | | |
2335 | | static Status execute_impl(FunctionContext* context, Block& block, |
2336 | | const ColumnNumbers& arguments, uint32_t result, |
2337 | 2 | size_t input_rows_count) { |
2338 | 2 | return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result, |
2339 | 2 | input_rows_count); |
2340 | 2 | } |
2341 | | }; |
2342 | | |
2343 | | class FunctionJsonSearch : public IFunction { |
2344 | | private: |
2345 | | using OneFun = std::function<Status(size_t, bool*)>; |
2346 | 0 | static Status always_one(size_t i, bool* res) { |
2347 | 0 | *res = true; |
2348 | 0 | return Status::OK(); |
2349 | 0 | } |
2350 | 0 | static Status always_all(size_t i, bool* res) { |
2351 | 0 | *res = false; |
2352 | 0 | return Status::OK(); |
2353 | 0 | } |
2354 | | |
2355 | | using CheckNullFun = std::function<bool(size_t)>; |
2356 | 0 | static bool always_not_null(size_t) { return false; } |
2357 | | |
2358 | | using GetJsonStringRefFun = std::function<StringRef(size_t)>; |
2359 | | |
2360 | 0 | Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const { |
2361 | 0 | StringRef pattern; // not used |
2362 | 0 | StringRef value_val(str.data(), str.size()); |
2363 | 0 | return (state->scalar_function)(&state->search_state, value_val, pattern, res); |
2364 | 0 | } |
2365 | | |
2366 | | /** |
2367 | | * Recursive search for matching string, if found, the result will be added to a vector |
2368 | | * @param element json element |
2369 | | * @param one_match |
2370 | | * @param search_str |
2371 | | * @param cur_path |
2372 | | * @param matches The path that has already been matched |
2373 | | * @return true if matched else false |
2374 | | */ |
2375 | | bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state, |
2376 | 0 | JsonbPath* cur_path, std::unordered_set<std::string>* matches) const { |
2377 | 0 | if (element->isString()) { |
2378 | 0 | const auto* json_string = element->unpack<JsonbStringVal>(); |
2379 | 0 | const std::string_view element_str(json_string->getBlob(), json_string->length()); |
2380 | 0 | unsigned char res; |
2381 | 0 | RETURN_IF_ERROR(matched(element_str, state, &res)); |
2382 | 0 | if (res) { |
2383 | 0 | std::string str; |
2384 | 0 | auto valid = cur_path->to_string(&str); |
2385 | 0 | if (!valid) { |
2386 | 0 | return false; |
2387 | 0 | } |
2388 | 0 | return matches->insert(str).second; |
2389 | 0 | } else { |
2390 | 0 | return false; |
2391 | 0 | } |
2392 | 0 | } else if (element->isObject()) { |
2393 | 0 | const auto* object = element->unpack<ObjectVal>(); |
2394 | 0 | bool find = false; |
2395 | 0 | for (const auto& item : *object) { |
2396 | 0 | Slice key(item.getKeyStr(), item.klen()); |
2397 | 0 | const auto* child_element = item.value(); |
2398 | | // construct an object member path leg. |
2399 | 0 | auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE); |
2400 | 0 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2401 | 0 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2402 | 0 | cur_path->pop_leg_from_leg_vector(); |
2403 | 0 | if (one_match && find) { |
2404 | 0 | return true; |
2405 | 0 | } |
2406 | 0 | } |
2407 | 0 | return find; |
2408 | 0 | } else if (element->isArray()) { |
2409 | 0 | const auto* array = element->unpack<ArrayVal>(); |
2410 | 0 | bool find = false; |
2411 | 0 | for (int i = 0; i < array->numElem(); ++i) { |
2412 | 0 | auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE); |
2413 | 0 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2414 | 0 | const auto* child_element = array->get(i); |
2415 | | // construct an array cell path leg. |
2416 | 0 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2417 | 0 | cur_path->pop_leg_from_leg_vector(); |
2418 | 0 | if (one_match && find) { |
2419 | 0 | return true; |
2420 | 0 | } |
2421 | 0 | } |
2422 | 0 | return find; |
2423 | 0 | } else { |
2424 | 0 | return false; |
2425 | 0 | } |
2426 | 0 | } |
2427 | | |
2428 | | void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches, |
2429 | 0 | ColumnString* result_col) const { |
2430 | 0 | if (matches.size() == 1) { |
2431 | 0 | for (const auto& str_ref : matches) { |
2432 | 0 | writer.writeStartString(); |
2433 | 0 | writer.writeString(str_ref); |
2434 | 0 | writer.writeEndString(); |
2435 | 0 | } |
2436 | 0 | } else { |
2437 | 0 | writer.writeStartArray(); |
2438 | 0 | for (const auto& str_ref : matches) { |
2439 | 0 | writer.writeStartString(); |
2440 | 0 | writer.writeString(str_ref); |
2441 | 0 | writer.writeEndString(); |
2442 | 0 | } |
2443 | 0 | writer.writeEndArray(); |
2444 | 0 | } |
2445 | |
|
2446 | 0 | result_col->insert_data(writer.getOutput()->getBuffer(), |
2447 | 0 | (size_t)writer.getOutput()->getSize()); |
2448 | 0 | } |
2449 | | |
2450 | | template <bool search_is_const> |
2451 | | Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check, |
2452 | | GetJsonStringRefFun col_json_string, CheckNullFun one_null_check, |
2453 | | OneFun one_check, CheckNullFun search_null_check, |
2454 | | const ColumnString* col_search_string, FunctionContext* context, |
2455 | 0 | size_t result) const { |
2456 | 0 | auto result_col = ColumnString::create(); |
2457 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
2458 | |
|
2459 | 0 | std::shared_ptr<LikeState> state_ptr; |
2460 | 0 | LikeState* state = nullptr; |
2461 | 0 | if (search_is_const) { |
2462 | 0 | state = reinterpret_cast<LikeState*>( |
2463 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
2464 | 0 | } |
2465 | |
|
2466 | 0 | bool is_one = false; |
2467 | |
|
2468 | 0 | JsonbWriter writer; |
2469 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
2470 | | // an error occurs if the json_doc argument is not a valid json document. |
2471 | 0 | if (json_null_check(i)) { |
2472 | 0 | null_map->get_data()[i] = 1; |
2473 | 0 | result_col->insert_data("", 0); |
2474 | 0 | continue; |
2475 | 0 | } |
2476 | 0 | const auto& json_doc_str = col_json_string(i); |
2477 | 0 | const JsonbDocument* json_doc = nullptr; |
2478 | 0 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, |
2479 | 0 | &json_doc); |
2480 | 0 | if (!st.ok()) { |
2481 | 0 | return Status::InvalidArgument( |
2482 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, |
2483 | 0 | st.to_string()); |
2484 | 0 | } |
2485 | | |
2486 | 0 | if (!one_null_check(i)) { |
2487 | 0 | RETURN_IF_ERROR(one_check(i, &is_one)); |
2488 | 0 | } |
2489 | | |
2490 | 0 | if (one_null_check(i) || search_null_check(i)) { |
2491 | 0 | null_map->get_data()[i] = 1; |
2492 | 0 | result_col->insert_data("", 0); |
2493 | 0 | continue; |
2494 | 0 | } |
2495 | | |
2496 | | // an error occurs if any path argument is not a valid path expression. |
2497 | 0 | std::string root_path_str = "$"; |
2498 | 0 | JsonbPath root_path; |
2499 | 0 | root_path.seek(root_path_str.c_str(), root_path_str.size()); |
2500 | 0 | std::vector<JsonbPath*> paths; |
2501 | 0 | paths.push_back(&root_path); |
2502 | |
|
2503 | 0 | if (!search_is_const) { |
2504 | 0 | state_ptr = std::make_shared<LikeState>(); |
2505 | 0 | state_ptr->is_like_pattern = true; |
2506 | 0 | const auto& search_str = col_search_string->get_data_at(i); |
2507 | 0 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, |
2508 | 0 | state_ptr, false)); |
2509 | 0 | state = state_ptr.get(); |
2510 | 0 | } |
2511 | | |
2512 | | // maintain a hashset to deduplicate matches. |
2513 | 0 | std::unordered_set<std::string> matches; |
2514 | 0 | for (const auto& item : paths) { |
2515 | 0 | auto* cur_path = item; |
2516 | 0 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); |
2517 | 0 | if (is_one && find) { |
2518 | 0 | break; |
2519 | 0 | } |
2520 | 0 | } |
2521 | 0 | if (matches.empty()) { |
2522 | | // returns NULL if the search_str is not found in the document. |
2523 | 0 | null_map->get_data()[i] = 1; |
2524 | 0 | result_col->insert_data("", 0); |
2525 | 0 | continue; |
2526 | 0 | } |
2527 | | |
2528 | 0 | writer.reset(); |
2529 | 0 | make_result_str(writer, matches, result_col.get()); |
2530 | 0 | } |
2531 | 0 | auto result_col_nullable = |
2532 | 0 | ColumnNullable::create(std::move(result_col), std::move(null_map)); |
2533 | 0 | block.replace_by_position(result, std::move(result_col_nullable)); |
2534 | 0 | return Status::OK(); |
2535 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Unexecuted instantiation: _ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm |
2536 | | |
2537 | | static constexpr auto one = "one"; |
2538 | | static constexpr auto all = "all"; |
2539 | | |
2540 | | public: |
2541 | | static constexpr auto name = "json_search"; |
2542 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); } |
2543 | | |
2544 | 1 | String get_name() const override { return name; } |
2545 | 1 | bool is_variadic() const override { return false; } |
2546 | 0 | size_t get_number_of_arguments() const override { return 3; } |
2547 | | |
2548 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2549 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2550 | 0 | } |
2551 | | |
2552 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
2553 | | |
2554 | 0 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
2555 | 0 | if (scope != FunctionContext::THREAD_LOCAL) { |
2556 | 0 | return Status::OK(); |
2557 | 0 | } |
2558 | 0 | if (context->is_col_constant(2)) { |
2559 | 0 | std::shared_ptr<LikeState> state = std::make_shared<LikeState>(); |
2560 | 0 | state->is_like_pattern = true; |
2561 | 0 | const auto pattern_col = context->get_constant_col(2)->column_ptr; |
2562 | 0 | const auto& pattern = pattern_col->get_data_at(0); |
2563 | 0 | RETURN_IF_ERROR( |
2564 | 0 | FunctionLike::construct_like_const_state(context, pattern, state, false)); |
2565 | 0 | context->set_function_state(scope, state); |
2566 | 0 | } |
2567 | 0 | return Status::OK(); |
2568 | 0 | } |
2569 | | |
2570 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2571 | 0 | uint32_t result, size_t input_rows_count) const override { |
2572 | | // the json_doc, one_or_all, and search_str must be given. |
2573 | | // and we require the positions are static. |
2574 | 0 | if (arguments.size() < 3) { |
2575 | 0 | return Status::InvalidArgument("too few arguments for function {}", name); |
2576 | 0 | } |
2577 | 0 | if (arguments.size() > 3) { |
2578 | 0 | return Status::NotSupported("escape and path params are not support now"); |
2579 | 0 | } |
2580 | | |
2581 | 0 | CheckNullFun json_null_check = always_not_null; |
2582 | 0 | GetJsonStringRefFun get_json_fun; |
2583 | | // prepare jsonb data column |
2584 | 0 | auto&& [col_json, json_is_const] = |
2585 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2586 | 0 | const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get()); |
2587 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) { |
2588 | 0 | col_json_string = |
2589 | 0 | check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get()); |
2590 | 0 | } |
2591 | |
|
2592 | 0 | if (!col_json_string) { |
2593 | 0 | return Status::RuntimeError("Illegal arg json {} should be ColumnString", |
2594 | 0 | col_json->get_name()); |
2595 | 0 | } |
2596 | | |
2597 | 0 | auto create_all_null_result = [&]() { |
2598 | 0 | auto res_str = ColumnString::create(); |
2599 | 0 | res_str->insert_default(); |
2600 | 0 | auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1)); |
2601 | 0 | if (input_rows_count > 1) { |
2602 | 0 | block.get_by_position(result).column = |
2603 | 0 | ColumnConst::create(std::move(res), input_rows_count); |
2604 | 0 | } else { |
2605 | 0 | block.get_by_position(result).column = std::move(res); |
2606 | 0 | } |
2607 | 0 | return Status::OK(); |
2608 | 0 | }; |
2609 | |
|
2610 | 0 | if (json_is_const) { |
2611 | 0 | if (col_json->is_null_at(0)) { |
2612 | 0 | return create_all_null_result(); |
2613 | 0 | } else { |
2614 | 0 | const auto& json_str = col_json_string->get_data_at(0); |
2615 | 0 | get_json_fun = [json_str](size_t i) { return json_str; }; |
2616 | 0 | } |
2617 | 0 | } else { |
2618 | 0 | json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); }; |
2619 | 0 | get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); }; |
2620 | 0 | } |
2621 | | |
2622 | | // one_or_all |
2623 | 0 | CheckNullFun one_null_check = always_not_null; |
2624 | 0 | OneFun one_check = always_one; |
2625 | 0 | auto&& [col_one, one_is_const] = |
2626 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2627 | 0 | one_is_const |= input_rows_count == 1; |
2628 | 0 | const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get()); |
2629 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) { |
2630 | 0 | col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2631 | 0 | } |
2632 | 0 | if (!col_one_string) { |
2633 | 0 | return Status::RuntimeError("Illegal arg one {} should be ColumnString", |
2634 | 0 | col_one->get_name()); |
2635 | 0 | } |
2636 | 0 | if (one_is_const) { |
2637 | 0 | if (col_one->is_null_at(0)) { |
2638 | 0 | return create_all_null_result(); |
2639 | 0 | } else { |
2640 | 0 | const auto& one_or_all = col_one_string->get_data_at(0); |
2641 | 0 | std::string one_or_all_str = one_or_all.to_string(); |
2642 | 0 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2643 | 0 | one_check = always_all; |
2644 | 0 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2645 | | // nothing |
2646 | 0 | } else { |
2647 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2648 | 0 | return Status::InvalidArgument( |
2649 | 0 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2650 | 0 | } |
2651 | 0 | } |
2652 | 0 | } else { |
2653 | 0 | one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); }; |
2654 | 0 | one_check = [col_one_string](size_t i, bool* is_one) { |
2655 | 0 | const auto& one_or_all = col_one_string->get_data_at(i); |
2656 | 0 | std::string one_or_all_str = one_or_all.to_string(); |
2657 | 0 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2658 | 0 | *is_one = false; |
2659 | 0 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2660 | 0 | *is_one = true; |
2661 | 0 | } else { |
2662 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2663 | 0 | return Status::InvalidArgument( |
2664 | 0 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2665 | 0 | } |
2666 | 0 | return Status::OK(); |
2667 | 0 | }; |
2668 | 0 | } |
2669 | | |
2670 | | // search_str |
2671 | 0 | auto&& [col_search, search_is_const] = |
2672 | 0 | unpack_if_const(block.get_by_position(arguments[2]).column); |
2673 | |
|
2674 | 0 | const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get()); |
2675 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) { |
2676 | 0 | col_search_string = |
2677 | 0 | check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2678 | 0 | } |
2679 | 0 | if (!col_search_string) { |
2680 | 0 | return Status::RuntimeError("Illegal arg pattern {} should be ColumnString", |
2681 | 0 | col_search->get_name()); |
2682 | 0 | } |
2683 | 0 | if (search_is_const) { |
2684 | 0 | CheckNullFun search_null_check = always_not_null; |
2685 | 0 | if (col_search->is_null_at(0)) { |
2686 | 0 | return create_all_null_result(); |
2687 | 0 | } |
2688 | 0 | RETURN_IF_ERROR(execute_vector<true>( |
2689 | 0 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2690 | 0 | one_check, search_null_check, col_search_string, context, result)); |
2691 | 0 | } else { |
2692 | 0 | CheckNullFun search_null_check = [col_search](size_t i) { |
2693 | 0 | return col_search->is_null_at(i); |
2694 | 0 | }; |
2695 | 0 | RETURN_IF_ERROR(execute_vector<false>( |
2696 | 0 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2697 | 0 | one_check, search_null_check, col_search_string, context, result)); |
2698 | 0 | } |
2699 | 0 | return Status::OK(); |
2700 | 0 | } |
2701 | | }; |
2702 | | |
2703 | | struct DocumentBuffer { |
2704 | | std::unique_ptr<char[]> ptr; |
2705 | | size_t size = 0; |
2706 | | size_t capacity = 0; |
2707 | | }; |
2708 | | |
2709 | | class FunctionJsonbRemove : public IFunction { |
2710 | | public: |
2711 | | static constexpr auto name = "jsonb_remove"; |
2712 | | static constexpr auto alias = "json_remove"; |
2713 | | |
2714 | 2 | static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); } |
2715 | | |
2716 | 0 | String get_name() const override { return name; } |
2717 | | |
2718 | 0 | size_t get_number_of_arguments() const override { return 0; } |
2719 | 1 | bool is_variadic() const override { return true; } |
2720 | | |
2721 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
2722 | | |
2723 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2724 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2725 | 0 | } |
2726 | | |
2727 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2728 | 0 | uint32_t result, size_t input_rows_count) const override { |
2729 | 0 | DCHECK_GE(arguments.size(), 2); |
2730 | | |
2731 | | // Check if arguments count is valid (json_doc + at least one path) |
2732 | 0 | if (arguments.size() < 2) { |
2733 | 0 | return Status::InvalidArgument("json_remove requires at least 2 arguments"); |
2734 | 0 | } |
2735 | | |
2736 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
2737 | 0 | auto result_column = return_data_type->create_column(); |
2738 | 0 | auto& nullable_column = assert_cast<ColumnNullable&>(*result_column); |
2739 | 0 | auto& res_chars = |
2740 | 0 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars(); |
2741 | 0 | auto& res_offsets = |
2742 | 0 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets(); |
2743 | 0 | auto& null_map = nullable_column.get_null_map_data(); |
2744 | |
|
2745 | 0 | res_chars.reserve(input_rows_count * 64); |
2746 | 0 | res_offsets.resize(input_rows_count); |
2747 | 0 | null_map.resize_fill(input_rows_count, 0); |
2748 | | |
2749 | | // Get JSON document column |
2750 | 0 | auto [json_column, json_const] = |
2751 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2752 | 0 | const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get()); |
2753 | 0 | const ColumnString* json_data_column = nullptr; |
2754 | 0 | const NullMap* json_null_map = nullptr; |
2755 | |
|
2756 | 0 | if (json_nullable) { |
2757 | 0 | json_null_map = &json_nullable->get_null_map_data(); |
2758 | 0 | json_data_column = |
2759 | 0 | check_and_get_column<ColumnString>(&json_nullable->get_nested_column()); |
2760 | 0 | } else { |
2761 | 0 | json_data_column = check_and_get_column<ColumnString>(json_column.get()); |
2762 | 0 | } |
2763 | |
|
2764 | 0 | if (!json_data_column) { |
2765 | 0 | return Status::InvalidArgument("First argument must be a JSON document"); |
2766 | 0 | } |
2767 | | |
2768 | | // Parse paths |
2769 | 0 | std::vector<const ColumnString*> path_columns; |
2770 | 0 | std::vector<const NullMap*> path_null_maps; |
2771 | 0 | std::vector<bool> path_constants; |
2772 | |
|
2773 | 0 | for (size_t i = 1; i < arguments.size(); ++i) { |
2774 | 0 | auto [path_column, path_const] = |
2775 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2776 | 0 | const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get()); |
2777 | |
|
2778 | 0 | if (path_nullable) { |
2779 | 0 | path_null_maps.push_back(&path_nullable->get_null_map_data()); |
2780 | 0 | path_columns.push_back( |
2781 | 0 | check_and_get_column<ColumnString>(&path_nullable->get_nested_column())); |
2782 | 0 | } else { |
2783 | 0 | path_null_maps.push_back(nullptr); |
2784 | 0 | path_columns.push_back(check_and_get_column<ColumnString>(path_column.get())); |
2785 | 0 | } |
2786 | |
|
2787 | 0 | if (!path_columns.back()) { |
2788 | 0 | return Status::InvalidArgument( |
2789 | 0 | fmt::format("Argument {} must be a string path", i + 1)); |
2790 | 0 | } |
2791 | | |
2792 | 0 | path_constants.push_back(path_const); |
2793 | 0 | } |
2794 | | |
2795 | | // Reusable JsonbWriter for performance |
2796 | 0 | JsonbWriter writer; |
2797 | |
|
2798 | 0 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { |
2799 | 0 | size_t json_idx = index_check_const(row_idx, json_const); |
2800 | | |
2801 | | // Check if JSON document is null |
2802 | 0 | if (json_null_map && (*json_null_map)[json_idx]) { |
2803 | 0 | null_map[row_idx] = 1; |
2804 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2805 | 0 | continue; |
2806 | 0 | } |
2807 | | |
2808 | | // Parse JSON document |
2809 | 0 | const auto& json_data = json_data_column->get_data_at(json_idx); |
2810 | 0 | const JsonbDocument* json_doc = nullptr; |
2811 | 0 | Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data, |
2812 | 0 | json_data.size, &json_doc); |
2813 | |
|
2814 | 0 | if (!parse_status.ok() || !json_doc) { |
2815 | 0 | null_map[row_idx] = 1; |
2816 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2817 | 0 | continue; |
2818 | 0 | } |
2819 | | |
2820 | | // Check if any path is null |
2821 | 0 | bool has_null_path = false; |
2822 | 0 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2823 | 0 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2824 | 0 | if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) { |
2825 | 0 | has_null_path = true; |
2826 | 0 | break; |
2827 | 0 | } |
2828 | 0 | } |
2829 | |
|
2830 | 0 | if (has_null_path) { |
2831 | 0 | null_map[row_idx] = 1; |
2832 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2833 | 0 | continue; |
2834 | 0 | } |
2835 | | |
2836 | 0 | std::vector<JsonbPath> paths; |
2837 | 0 | std::vector<bool> path_constants_vec; |
2838 | |
|
2839 | 0 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2840 | 0 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2841 | 0 | const auto& path_data = path_columns[path_idx]->get_data_at(idx); |
2842 | |
|
2843 | 0 | JsonbPath path; |
2844 | 0 | if (!path.seek(path_data.data, path_data.size)) { |
2845 | 0 | return Status::InvalidArgument( |
2846 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
2847 | 0 | std::string_view(path_data.data, path_data.size), row_idx); |
2848 | 0 | } |
2849 | | |
2850 | 0 | if (path.is_wildcard() || path.is_supper_wildcard()) { |
2851 | 0 | return Status::InvalidArgument( |
2852 | 0 | "In this situation, path expressions may not contain the * and ** " |
2853 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2854 | 0 | path_idx + 1, row_idx); |
2855 | 0 | } |
2856 | | |
2857 | 0 | paths.push_back(std::move(path)); |
2858 | 0 | path_constants_vec.push_back(path_constants[path_idx]); |
2859 | 0 | } |
2860 | | |
2861 | 0 | const JsonbValue* current_value = json_doc->getValue(); |
2862 | |
|
2863 | 0 | DocumentBuffer tmp_buffer; |
2864 | |
|
2865 | 0 | for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) { |
2866 | 0 | writer.reset(); |
2867 | |
|
2868 | 0 | auto find_result = current_value->findValue(paths[path_idx]); |
2869 | |
|
2870 | 0 | if (find_result.is_wildcard) { |
2871 | 0 | continue; |
2872 | 0 | } |
2873 | | |
2874 | 0 | if (find_result.value) { |
2875 | 0 | RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer)); |
2876 | | |
2877 | 0 | auto* writer_output = writer.getOutput(); |
2878 | 0 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2879 | 0 | tmp_buffer.capacity = |
2880 | 0 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2881 | 0 | tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity); |
2882 | 0 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2883 | 0 | } |
2884 | |
|
2885 | 0 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), |
2886 | 0 | writer_output->getSize()); |
2887 | 0 | tmp_buffer.size = writer_output->getSize(); |
2888 | |
|
2889 | 0 | const JsonbDocument* new_doc = nullptr; |
2890 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2891 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc)); |
2892 | | |
2893 | 0 | current_value = new_doc->getValue(); |
2894 | 0 | } |
2895 | 0 | } |
2896 | | |
2897 | 0 | const JsonbDocument* modified_doc = nullptr; |
2898 | 0 | if (current_value != json_doc->getValue()) { |
2899 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2900 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc)); |
2901 | 0 | } else { |
2902 | 0 | modified_doc = json_doc; |
2903 | 0 | } |
2904 | | |
2905 | | // Write the final result |
2906 | 0 | const auto size = modified_doc->numPackedBytes(); |
2907 | 0 | res_chars.insert(reinterpret_cast<const char*>(modified_doc), |
2908 | 0 | reinterpret_cast<const char*>(modified_doc) + size); |
2909 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2910 | 0 | } |
2911 | | |
2912 | 0 | block.get_by_position(result).column = std::move(result_column); |
2913 | 0 | return Status::OK(); |
2914 | 0 | } |
2915 | | |
2916 | | private: |
2917 | | Status clone_without_path(const JsonbValue* root, const JsonbPath& path, |
2918 | 0 | JsonbWriter& writer) const { |
2919 | | // Start writing at the root level |
2920 | 0 | if (root->isObject()) { |
2921 | 0 | writer.writeStartObject(); |
2922 | 0 | RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer)); |
2923 | 0 | writer.writeEndObject(); |
2924 | 0 | } else if (root->isArray()) { |
2925 | 0 | writer.writeStartArray(); |
2926 | 0 | RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer)); |
2927 | 0 | writer.writeEndArray(); |
2928 | 0 | } else { |
2929 | | // Primitive value - can't remove anything from it |
2930 | 0 | writer.writeValue(root); |
2931 | 0 | } |
2932 | 0 | return Status::OK(); |
2933 | 0 | } |
2934 | | |
2935 | | Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path, |
2936 | 0 | size_t depth, JsonbWriter& writer) const { |
2937 | 0 | const auto* obj = obj_value->unpack<ObjectVal>(); |
2938 | |
|
2939 | 0 | for (const auto& kv : *obj) { |
2940 | 0 | std::string key(kv.getKeyStr(), kv.klen()); |
2941 | |
|
2942 | 0 | if (depth < path.get_leg_vector_size()) { |
2943 | 0 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2944 | 0 | if (leg->type == MEMBER_CODE) { |
2945 | 0 | std::string target_key(leg->leg_ptr, leg->leg_len); |
2946 | |
|
2947 | 0 | if (key == target_key) { |
2948 | 0 | if (depth == path.get_leg_vector_size() - 1) { |
2949 | 0 | continue; |
2950 | 0 | } else { |
2951 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2952 | 0 | if (kv.value()->isObject()) { |
2953 | 0 | writer.writeStartObject(); |
2954 | 0 | RETURN_IF_ERROR(clone_object_without_path(kv.value(), path, |
2955 | 0 | depth + 1, writer)); |
2956 | 0 | writer.writeEndObject(); |
2957 | 0 | } else if (kv.value()->isArray()) { |
2958 | 0 | writer.writeStartArray(); |
2959 | 0 | RETURN_IF_ERROR(clone_array_without_path(kv.value(), path, |
2960 | 0 | depth + 1, writer)); |
2961 | 0 | writer.writeEndArray(); |
2962 | 0 | } else { |
2963 | 0 | writer.writeValue(kv.value()); |
2964 | 0 | } |
2965 | 0 | } |
2966 | 0 | } else { |
2967 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2968 | 0 | writer.writeValue(kv.value()); |
2969 | 0 | } |
2970 | 0 | } else { |
2971 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2972 | 0 | writer.writeValue(kv.value()); |
2973 | 0 | } |
2974 | 0 | } else { |
2975 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2976 | 0 | writer.writeValue(kv.value()); |
2977 | 0 | } |
2978 | 0 | } |
2979 | | |
2980 | 0 | return Status::OK(); |
2981 | 0 | } |
2982 | | |
2983 | | Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path, |
2984 | 0 | size_t depth, JsonbWriter& writer) const { |
2985 | 0 | const auto* arr = arr_value->unpack<ArrayVal>(); |
2986 | |
|
2987 | 0 | int index = 0; |
2988 | 0 | for (const auto& element : *arr) { |
2989 | 0 | if (depth < path.get_leg_vector_size()) { |
2990 | 0 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2991 | 0 | if (leg->type == ARRAY_CODE) { |
2992 | 0 | int target_index = leg->array_index; |
2993 | |
|
2994 | 0 | if (index == target_index) { |
2995 | 0 | if (depth == path.get_leg_vector_size() - 1) { |
2996 | | // This is the target element to remove - skip it |
2997 | 0 | } else { |
2998 | 0 | if (element.isObject()) { |
2999 | 0 | writer.writeStartObject(); |
3000 | 0 | RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1, |
3001 | 0 | writer)); |
3002 | 0 | writer.writeEndObject(); |
3003 | 0 | } else if (element.isArray()) { |
3004 | 0 | writer.writeStartArray(); |
3005 | 0 | RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1, |
3006 | 0 | writer)); |
3007 | 0 | writer.writeEndArray(); |
3008 | 0 | } else { |
3009 | 0 | writer.writeValue(&element); |
3010 | 0 | } |
3011 | 0 | } |
3012 | 0 | } else { |
3013 | 0 | writer.writeValue(&element); |
3014 | 0 | } |
3015 | 0 | } else { |
3016 | 0 | writer.writeValue(&element); |
3017 | 0 | } |
3018 | 0 | } else { |
3019 | 0 | writer.writeValue(&element); |
3020 | 0 | } |
3021 | 0 | index++; |
3022 | 0 | } |
3023 | | |
3024 | 0 | return Status::OK(); |
3025 | 0 | } |
3026 | | }; |
3027 | | |
3028 | | class FunctionStripNullValue : public IFunction { |
3029 | | public: |
3030 | | static constexpr auto name = "strip_null_value"; |
3031 | 2 | static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); } |
3032 | | |
3033 | 1 | String get_name() const override { return name; } |
3034 | 1 | bool is_variadic() const override { return false; } |
3035 | 0 | size_t get_number_of_arguments() const override { return 1; } |
3036 | | |
3037 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
3038 | | |
3039 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3040 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
3041 | 0 | } |
3042 | | |
3043 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3044 | 0 | uint32_t result, size_t input_rows_count) const override { |
3045 | 0 | const auto& arg_column = block.get_by_position(arguments[0]).column; |
3046 | 0 | const ColumnString* json_column = nullptr; |
3047 | 0 | const NullMap* json_null_map = nullptr; |
3048 | 0 | if (arg_column->is_nullable()) { |
3049 | 0 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column); |
3050 | 0 | json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column()); |
3051 | 0 | json_null_map = &nullable_col.get_null_map_data(); |
3052 | 0 | } else { |
3053 | 0 | json_column = assert_cast<const ColumnString*>(arg_column.get()); |
3054 | 0 | } |
3055 | |
|
3056 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
3057 | 0 | auto result_column = return_data_type->create_column(); |
3058 | |
|
3059 | 0 | auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data(); |
3060 | 0 | auto& result_data_col = assert_cast<ColumnString&>( |
3061 | 0 | assert_cast<ColumnNullable&>(*result_column).get_nested_column()); |
3062 | |
|
3063 | 0 | result_nullmap.resize_fill(input_rows_count, 0); |
3064 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
3065 | 0 | if (json_null_map && (*json_null_map)[i]) { |
3066 | 0 | result_nullmap[i] = 1; |
3067 | 0 | result_data_col.insert_default(); |
3068 | 0 | continue; |
3069 | 0 | } |
3070 | 0 | const JsonbDocument* json_doc = nullptr; |
3071 | 0 | const auto& json_str = json_column->get_data_at(i); |
3072 | 0 | RETURN_IF_ERROR( |
3073 | 0 | JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc)); |
3074 | 0 | if (json_doc) [[likely]] { |
3075 | 0 | if (json_doc->getValue()->isNull()) { |
3076 | 0 | result_nullmap[i] = 1; |
3077 | 0 | result_data_col.insert_default(); |
3078 | 0 | } else { |
3079 | 0 | result_nullmap[i] = 0; |
3080 | 0 | result_data_col.insert_data(json_str.data, json_str.size); |
3081 | 0 | } |
3082 | 0 | } else { |
3083 | 0 | result_nullmap[i] = 1; |
3084 | 0 | result_data_col.insert_default(); |
3085 | 0 | } |
3086 | 0 | } |
3087 | | |
3088 | 0 | block.get_by_position(result).column = std::move(result_column); |
3089 | 0 | return Status::OK(); |
3090 | 0 | } |
3091 | | }; |
3092 | | |
3093 | 1 | void register_function_jsonb(SimpleFunctionFactory& factory) { |
3094 | 1 | factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name); |
3095 | 1 | factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias); |
3096 | 1 | factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null"); |
3097 | 1 | factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null"); |
3098 | 1 | factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value"); |
3099 | 1 | factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value"); |
3100 | | |
3101 | 1 | factory.register_function<FunctionJsonbExists>(); |
3102 | 1 | factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias); |
3103 | 1 | factory.register_function<FunctionJsonbType>(); |
3104 | 1 | factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias); |
3105 | | |
3106 | 1 | factory.register_function<FunctionJsonbKeys>(); |
3107 | 1 | factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias); |
3108 | | |
3109 | 1 | factory.register_function<FunctionJsonbExtractIsnull>(); |
3110 | 1 | factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias); |
3111 | | |
3112 | 1 | factory.register_function<FunctionJsonbExtractJsonb>(); |
3113 | 1 | factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias); |
3114 | 1 | factory.register_function<FunctionJsonbExtractJsonbNoQuotes>(); |
3115 | 1 | factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name, |
3116 | 1 | FunctionJsonbExtractJsonbNoQuotes::alias); |
3117 | | |
3118 | 1 | factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>(); |
3119 | 1 | factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>(); |
3120 | | |
3121 | 1 | factory.register_function<FunctionJsonSearch>(); |
3122 | | |
3123 | 1 | factory.register_function<FunctionJsonbArray<false>>(); |
3124 | 1 | factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias); |
3125 | | |
3126 | 1 | factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null"); |
3127 | 1 | factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null"); |
3128 | | |
3129 | 1 | factory.register_function<FunctionJsonbObject>(); |
3130 | 1 | factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias); |
3131 | | |
3132 | 1 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>(); |
3133 | 1 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name, |
3134 | 1 | FunctionJsonbModify<JsonbModifyType::Insert>::alias); |
3135 | 1 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>(); |
3136 | 1 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name, |
3137 | 1 | FunctionJsonbModify<JsonbModifyType::Set>::alias); |
3138 | 1 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>(); |
3139 | 1 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name, |
3140 | 1 | FunctionJsonbModify<JsonbModifyType::Replace>::alias); |
3141 | | |
3142 | 1 | factory.register_function<FunctionJsonbRemove>(); |
3143 | 1 | factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias); |
3144 | | |
3145 | 1 | factory.register_function<FunctionStripNullValue>(); |
3146 | 1 | } |
3147 | | |
3148 | | } // namespace doris |