be/src/exprs/function/function_jsonb.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstdlib> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <string_view> |
25 | | #include <tuple> |
26 | | #include <type_traits> |
27 | | #include <utility> |
28 | | #include <variant> |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "common/status.h" |
32 | | #include "core/assert_cast.h" |
33 | | #include "core/block/block.h" |
34 | | #include "core/block/column_numbers.h" |
35 | | #include "core/block/column_with_type_and_name.h" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_array.h" |
38 | | #include "core/column/column_const.h" |
39 | | #include "core/column/column_nullable.h" |
40 | | #include "core/column/column_string.h" |
41 | | #include "core/column/column_vector.h" |
42 | | #include "core/custom_allocator.h" |
43 | | #include "core/data_type/data_type.h" |
44 | | #include "core/data_type/data_type_array.h" |
45 | | #include "core/data_type/data_type_jsonb.h" |
46 | | #include "core/data_type/data_type_nullable.h" |
47 | | #include "core/data_type/data_type_string.h" |
48 | | #include "core/data_type/define_primitive_type.h" |
49 | | #include "core/data_type/primitive_type.h" |
50 | | #include "core/string_ref.h" |
51 | | #include "core/types.h" |
52 | | #include "core/value/jsonb_value.h" |
53 | | #include "exec/common/stringop_substring.h" |
54 | | #include "exec/common/template_helpers.hpp" |
55 | | #include "exec/common/util.hpp" |
56 | | #include "exprs/aggregate/aggregate_function.h" |
57 | | #include "exprs/function/function.h" |
58 | | #include "exprs/function/like.h" |
59 | | #include "exprs/function/simple_function_factory.h" |
60 | | #include "exprs/function_context.h" |
61 | | #include "util/jsonb_document.h" |
62 | | #include "util/jsonb_stream.h" |
63 | | #include "util/jsonb_utils.h" |
64 | | #include "util/jsonb_writer.h" |
65 | | #include "util/simd/bits.h" |
66 | | |
67 | | namespace doris { |
68 | | |
69 | | enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT }; |
70 | | |
71 | | enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE }; |
72 | | |
73 | | // func(string,string) -> json |
74 | | template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode> |
75 | | class FunctionJsonbParseBase : public IFunction { |
76 | | private: |
77 | | struct FunctionJsonbParseState { |
78 | | StringRef default_value; |
79 | | JsonBinaryValue default_value_parser; |
80 | | bool has_const_default_value = false; |
81 | | bool default_is_null = false; |
82 | | }; |
83 | | |
84 | | public: |
85 | | static constexpr auto name = "json_parse"; |
86 | | static constexpr auto alias = "jsonb_parse"; |
87 | 87 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv Line | Count | Source | 87 | 27 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv Line | Count | Source | 87 | 39 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv Line | Count | Source | 87 | 21 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
|
88 | | |
89 | 4 | String get_name() const override { |
90 | 4 | String error_mode; |
91 | 4 | switch (parse_error_handle_mode) { |
92 | 1 | case JsonbParseErrorMode::FAIL: |
93 | 1 | break; |
94 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
95 | 1 | error_mode = "_error_to_null"; |
96 | 1 | break; |
97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: |
98 | 2 | error_mode = "_error_to_value"; |
99 | 2 | break; |
100 | 4 | } |
101 | | |
102 | 4 | return name + error_mode; |
103 | 4 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 1 | case JsonbParseErrorMode::FAIL: | 93 | 1 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 1 | error_mode = "_error_to_null"; | 96 | 1 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 2 | String get_name() const override { | 90 | 2 | String error_mode; | 91 | 2 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 2 | error_mode = "_error_to_value"; | 99 | 2 | break; | 100 | 2 | } | 101 | | | 102 | 2 | return name + error_mode; | 103 | 2 | } |
|
104 | | |
105 | 64 | bool is_variadic() const override { |
106 | 64 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; |
107 | 64 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv Line | Count | Source | 105 | 19 | bool is_variadic() const override { | 106 | 19 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 19 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv Line | Count | Source | 105 | 31 | bool is_variadic() const override { | 106 | 31 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 31 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv Line | Count | Source | 105 | 14 | bool is_variadic() const override { | 106 | 14 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 14 | } |
|
108 | | |
109 | 49 | size_t get_number_of_arguments() const override { |
110 | 49 | switch (parse_error_handle_mode) { |
111 | 18 | case JsonbParseErrorMode::FAIL: |
112 | 18 | return 1; |
113 | 30 | case JsonbParseErrorMode::RETURN_NULL: |
114 | 30 | return 1; |
115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: |
116 | 1 | return 0; |
117 | 49 | } |
118 | 49 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv Line | Count | Source | 109 | 18 | size_t get_number_of_arguments() const override { | 110 | 18 | switch (parse_error_handle_mode) { | 111 | 18 | case JsonbParseErrorMode::FAIL: | 112 | 18 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 18 | } | 118 | 18 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv Line | Count | Source | 109 | 30 | size_t get_number_of_arguments() const override { | 110 | 30 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 30 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 30 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 30 | } | 118 | 30 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv Line | Count | Source | 109 | 1 | size_t get_number_of_arguments() const override { | 110 | 1 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 1 | return 0; | 117 | 1 | } | 118 | 1 | } |
|
119 | | |
120 | 60 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
121 | 60 | bool is_nullable = false; |
122 | 60 | switch (nullable_mode) { |
123 | 30 | case NullalbeMode::NULLABLE: |
124 | 30 | is_nullable = true; |
125 | 30 | break; |
126 | 30 | case NullalbeMode::FOLLOW_INPUT: { |
127 | 41 | for (auto arg : arguments) { |
128 | 41 | is_nullable |= arg->is_nullable(); |
129 | 41 | } |
130 | 30 | break; |
131 | 0 | } |
132 | 60 | } |
133 | | |
134 | 60 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) |
135 | 60 | : std::make_shared<DataTypeJsonb>(); |
136 | 60 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 18 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 18 | bool is_nullable = false; | 122 | 18 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 18 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 18 | for (auto arg : arguments) { | 128 | 18 | is_nullable |= arg->is_nullable(); | 129 | 18 | } | 130 | 18 | break; | 131 | 0 | } | 132 | 18 | } | 133 | | | 134 | 18 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 18 | : std::make_shared<DataTypeJsonb>(); | 136 | 18 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 30 | bool is_nullable = false; | 122 | 30 | switch (nullable_mode) { | 123 | 30 | case NullalbeMode::NULLABLE: | 124 | 30 | is_nullable = true; | 125 | 30 | break; | 126 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 0 | for (auto arg : arguments) { | 128 | 0 | is_nullable |= arg->is_nullable(); | 129 | 0 | } | 130 | 0 | break; | 131 | 0 | } | 132 | 30 | } | 133 | | | 134 | 30 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 30 | : std::make_shared<DataTypeJsonb>(); | 136 | 30 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 12 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 12 | bool is_nullable = false; | 122 | 12 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 12 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 23 | for (auto arg : arguments) { | 128 | 23 | is_nullable |= arg->is_nullable(); | 129 | 23 | } | 130 | 12 | break; | 131 | 0 | } | 132 | 12 | } | 133 | | | 134 | 12 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 12 | : std::make_shared<DataTypeJsonb>(); | 136 | 12 | } |
|
137 | | |
138 | 135 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 44 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 64 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 27 | bool use_default_implementation_for_nulls() const override { return false; } |
|
139 | | |
140 | 316 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
141 | 316 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
142 | 59 | std::shared_ptr<FunctionJsonbParseState> state = |
143 | 59 | std::make_shared<FunctionJsonbParseState>(); |
144 | 59 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); |
145 | 59 | } |
146 | 316 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
147 | 126 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
148 | 11 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
149 | 11 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
150 | 11 | if (state) { |
151 | 11 | if (context->get_num_args() == 2) { |
152 | 8 | if (context->is_col_constant(1)) { |
153 | 2 | const auto default_value_col = context->get_constant_col(1)->column_ptr; |
154 | 2 | if (default_value_col->is_null_at(0)) { |
155 | 1 | state->default_is_null = true; |
156 | 1 | } else { |
157 | 1 | const auto& default_value = default_value_col->get_data_at(0); |
158 | | |
159 | 1 | state->default_value = default_value; |
160 | 1 | state->has_const_default_value = true; |
161 | 1 | } |
162 | 2 | } |
163 | 8 | } else if (context->get_num_args() == 1) { |
164 | 2 | RETURN_IF_ERROR( |
165 | 2 | state->default_value_parser.from_json_string(std::string("{}"))); |
166 | 2 | state->default_value = StringRef(state->default_value_parser.value(), |
167 | 2 | state->default_value_parser.size()); |
168 | 2 | state->has_const_default_value = true; |
169 | 2 | } |
170 | 11 | } |
171 | 11 | } |
172 | | |
173 | 126 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { |
174 | 1 | return Status::InvalidArgument( |
175 | 1 | "{} function should have 1 or 2 arguments, " |
176 | 1 | "but got {}", |
177 | 1 | get_name(), context->get_num_args()); |
178 | 1 | } |
179 | 126 | } |
180 | 125 | return Status::OK(); |
181 | 316 | } _ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 76 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 76 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 18 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 18 | std::make_shared<FunctionJsonbParseState>(); | 144 | 18 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 18 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 76 | return Status::OK(); | 181 | 76 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 114 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 114 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 30 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 30 | std::make_shared<FunctionJsonbParseState>(); | 144 | 30 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 30 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 114 | return Status::OK(); | 181 | 114 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 126 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 126 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 11 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 11 | std::make_shared<FunctionJsonbParseState>(); | 144 | 11 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 11 | } | 146 | 126 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | 126 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | 11 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | 11 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | 11 | if (state) { | 151 | 11 | if (context->get_num_args() == 2) { | 152 | 8 | if (context->is_col_constant(1)) { | 153 | 2 | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | 2 | if (default_value_col->is_null_at(0)) { | 155 | 1 | state->default_is_null = true; | 156 | 1 | } else { | 157 | 1 | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | 1 | state->default_value = default_value; | 160 | 1 | state->has_const_default_value = true; | 161 | 1 | } | 162 | 2 | } | 163 | 8 | } else if (context->get_num_args() == 1) { | 164 | 2 | RETURN_IF_ERROR( | 165 | 2 | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | 2 | state->default_value = StringRef(state->default_value_parser.value(), | 167 | 2 | state->default_value_parser.size()); | 168 | 2 | state->has_const_default_value = true; | 169 | 2 | } | 170 | 11 | } | 171 | 11 | } | 172 | | | 173 | 126 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | 1 | return Status::InvalidArgument( | 175 | 1 | "{} function should have 1 or 2 arguments, " | 176 | 1 | "but got {}", | 177 | 1 | get_name(), context->get_num_args()); | 178 | 1 | } | 179 | 126 | } | 180 | 125 | return Status::OK(); | 181 | 126 | } |
|
182 | | |
183 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
184 | 75 | uint32_t result, size_t input_rows_count) const override { |
185 | 75 | auto&& [col_from, col_from_is_const] = |
186 | 75 | unpack_if_const(block.get_by_position(arguments[0]).column); |
187 | | |
188 | 75 | if (col_from_is_const && col_from->is_null_at(0)) { |
189 | 1 | auto col_str = ColumnString::create(); |
190 | 1 | col_str->insert_default(); |
191 | 1 | auto null_map = ColumnUInt8::create(1, 1); |
192 | 1 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); |
193 | 1 | block.get_by_position(result).column = |
194 | 1 | ColumnConst::create(std::move(nullable_col), input_rows_count); |
195 | 1 | return Status::OK(); |
196 | 1 | } |
197 | | |
198 | 74 | auto null_map = ColumnUInt8::create(0, 0); |
199 | 74 | bool is_nullable = false; |
200 | | |
201 | 74 | switch (nullable_mode) { |
202 | 34 | case NullalbeMode::NULLABLE: { |
203 | 34 | is_nullable = true; |
204 | 34 | break; |
205 | 0 | } |
206 | 40 | case NullalbeMode::FOLLOW_INPUT: { |
207 | 52 | for (auto arg : arguments) { |
208 | 52 | is_nullable |= block.get_by_position(arg).type->is_nullable(); |
209 | 52 | } |
210 | 40 | break; |
211 | 0 | } |
212 | 74 | } |
213 | | |
214 | 74 | if (is_nullable) { |
215 | 64 | null_map = ColumnUInt8::create(input_rows_count, 0); |
216 | 64 | } |
217 | | |
218 | 60 | const ColumnString* col_from_string = nullptr; |
219 | 74 | if (col_from->is_nullable()) { |
220 | 39 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); |
221 | | |
222 | 39 | VectorizedUtils::update_null_map(null_map->get_data(), |
223 | 39 | nullable_col.get_null_map_data()); |
224 | 39 | col_from_string = |
225 | 39 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); |
226 | 39 | } else { |
227 | 35 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); |
228 | 35 | } |
229 | | |
230 | 60 | StringRef constant_default_value; |
231 | 60 | bool default_value_const = false; |
232 | 60 | bool default_value_null_const = false; |
233 | 60 | ColumnPtr default_value_col; |
234 | 60 | JsonBinaryValue default_jsonb_value_parser; |
235 | 60 | const ColumnString* default_value_str_col = nullptr; |
236 | 60 | const NullMap* default_value_nullmap = nullptr; |
237 | 60 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
238 | 14 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
239 | 14 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
240 | 14 | if (state && state->has_const_default_value) { |
241 | 7 | constant_default_value = state->default_value; |
242 | 7 | default_value_null_const = state->default_is_null; |
243 | 7 | default_value_const = true; |
244 | 7 | } else if (arguments.size() > 1) { |
245 | 7 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != |
246 | 7 | PrimitiveType::TYPE_JSONB) { |
247 | 1 | return Status::InvalidArgument( |
248 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), |
249 | 1 | block.get_by_position(arguments[1]).type->get_name()); |
250 | 1 | } |
251 | 6 | std::tie(default_value_col, default_value_const) = |
252 | 6 | unpack_if_const(block.get_by_position(arguments[1]).column); |
253 | 6 | if (default_value_const) { |
254 | 1 | const JsonbDocument* default_value_doc = nullptr; |
255 | 1 | if (default_value_col->is_null_at(0)) { |
256 | 1 | default_value_null_const = true; |
257 | 1 | } else { |
258 | 0 | auto data = default_value_col->get_data_at(0); |
259 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, |
260 | 0 | &default_value_doc)); |
261 | 0 | constant_default_value = data; |
262 | 0 | } |
263 | 5 | } else { |
264 | 5 | if (default_value_col->is_nullable()) { |
265 | 4 | const auto& nullable_col = |
266 | 4 | assert_cast<const ColumnNullable&>(*default_value_col); |
267 | 4 | default_value_str_col = assert_cast<const ColumnString*>( |
268 | 4 | nullable_col.get_nested_column_ptr().get()); |
269 | 4 | default_value_nullmap = &(nullable_col.get_null_map_data()); |
270 | 4 | } else { |
271 | 1 | default_value_str_col = |
272 | 1 | assert_cast<const ColumnString*>(default_value_col.get()); |
273 | 1 | } |
274 | 5 | } |
275 | 6 | } else if (arguments.size() == 1) { |
276 | | // parse default value '{}' should always success. |
277 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); |
278 | 0 | default_value_const = true; |
279 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); |
280 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); |
281 | 0 | } |
282 | 14 | } |
283 | | |
284 | 13 | auto col_to = ColumnString::create(); |
285 | | |
286 | 60 | col_to->reserve(input_rows_count); |
287 | | |
288 | 60 | auto& null_map_data = null_map->get_data(); |
289 | | |
290 | | // parser can be reused for performance |
291 | 60 | JsonBinaryValue jsonb_value; |
292 | | |
293 | 282 | for (size_t i = 0; i < input_rows_count; ++i) { |
294 | 205 | if (is_nullable && null_map_data[i]) { |
295 | 13 | col_to->insert_default(); |
296 | 13 | continue; |
297 | 13 | } |
298 | | |
299 | 192 | auto index = index_check_const(i, col_from_is_const); |
300 | 192 | const auto& val = col_from_string->get_data_at(index); |
301 | 192 | auto st = jsonb_value.from_json_string(val.data, val.size); |
302 | 192 | if (st.ok()) { |
303 | | // insert jsonb format data |
304 | 138 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); |
305 | 138 | } else { |
306 | 54 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { |
307 | 6 | return Status::InvalidArgument( |
308 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); |
309 | 17 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { |
310 | 17 | null_map_data[i] = 1; |
311 | 17 | col_to->insert_default(); |
312 | 31 | } else { |
313 | 31 | if (default_value_const) { |
314 | 9 | if (default_value_null_const) { |
315 | 3 | null_map_data[i] = 1; |
316 | 3 | col_to->insert_default(); |
317 | 6 | } else { |
318 | 6 | col_to->insert_data(constant_default_value.data, |
319 | 6 | constant_default_value.size); |
320 | 6 | } |
321 | 22 | } else { |
322 | 22 | if (default_value_nullmap && (*default_value_nullmap)[i]) { |
323 | 3 | null_map_data[i] = 1; |
324 | 3 | col_to->insert_default(); |
325 | 3 | continue; |
326 | 3 | } |
327 | 19 | auto value = default_value_str_col->get_data_at(i); |
328 | 19 | col_to->insert_data(value.data, value.size); |
329 | 19 | } |
330 | 31 | } |
331 | 54 | } |
332 | 192 | } |
333 | | |
334 | 77 | if (is_nullable) { |
335 | 58 | block.replace_by_position( |
336 | 58 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); |
337 | 58 | } else { |
338 | 19 | block.replace_by_position(result, std::move(col_to)); |
339 | 19 | } |
340 | | |
341 | 17 | return Status::OK(); |
342 | 14 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 26 | uint32_t result, size_t input_rows_count) const override { | 185 | 26 | auto&& [col_from, col_from_is_const] = | 186 | 26 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 26 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 26 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 26 | bool is_nullable = false; | 200 | | | 201 | 26 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 26 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 26 | for (auto arg : arguments) { | 208 | 26 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 26 | } | 210 | 26 | break; | 211 | 0 | } | 212 | 26 | } | 213 | | | 214 | 26 | if (is_nullable) { | 215 | 17 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 17 | } | 217 | | | 218 | 26 | const ColumnString* col_from_string = nullptr; | 219 | 26 | if (col_from->is_nullable()) { | 220 | 17 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 17 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 17 | nullable_col.get_null_map_data()); | 224 | 17 | col_from_string = | 225 | 17 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 17 | } else { | 227 | 9 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 9 | } | 229 | | | 230 | 26 | StringRef constant_default_value; | 231 | 26 | bool default_value_const = false; | 232 | 26 | bool default_value_null_const = false; | 233 | 26 | ColumnPtr default_value_col; | 234 | 26 | JsonBinaryValue default_jsonb_value_parser; | 235 | 26 | const ColumnString* default_value_str_col = nullptr; | 236 | 26 | const NullMap* default_value_nullmap = nullptr; | 237 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | | if (state && state->has_const_default_value) { | 241 | | constant_default_value = state->default_value; | 242 | | default_value_null_const = state->default_is_null; | 243 | | default_value_const = true; | 244 | | } else if (arguments.size() > 1) { | 245 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | | PrimitiveType::TYPE_JSONB) { | 247 | | return Status::InvalidArgument( | 248 | | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | | block.get_by_position(arguments[1]).type->get_name()); | 250 | | } | 251 | | std::tie(default_value_col, default_value_const) = | 252 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | | if (default_value_const) { | 254 | | const JsonbDocument* default_value_doc = nullptr; | 255 | | if (default_value_col->is_null_at(0)) { | 256 | | default_value_null_const = true; | 257 | | } else { | 258 | | auto data = default_value_col->get_data_at(0); | 259 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | | &default_value_doc)); | 261 | | constant_default_value = data; | 262 | | } | 263 | | } else { | 264 | | if (default_value_col->is_nullable()) { | 265 | | const auto& nullable_col = | 266 | | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | | default_value_str_col = assert_cast<const ColumnString*>( | 268 | | nullable_col.get_nested_column_ptr().get()); | 269 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | | } else { | 271 | | default_value_str_col = | 272 | | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | | } | 274 | | } | 275 | | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | | default_value_const = true; | 279 | | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | | } | 282 | | } | 283 | | | 284 | 26 | auto col_to = ColumnString::create(); | 285 | | | 286 | 26 | col_to->reserve(input_rows_count); | 287 | | | 288 | 26 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 26 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 68 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 42 | if (is_nullable && null_map_data[i]) { | 295 | 1 | col_to->insert_default(); | 296 | 1 | continue; | 297 | 1 | } | 298 | | | 299 | 41 | auto index = index_check_const(i, col_from_is_const); | 300 | 41 | const auto& val = col_from_string->get_data_at(index); | 301 | 41 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 41 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 35 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 35 | } else { | 306 | 6 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | 6 | return Status::InvalidArgument( | 308 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | | null_map_data[i] = 1; | 311 | | col_to->insert_default(); | 312 | | } else { | 313 | | if (default_value_const) { | 314 | | if (default_value_null_const) { | 315 | | null_map_data[i] = 1; | 316 | | col_to->insert_default(); | 317 | | } else { | 318 | | col_to->insert_data(constant_default_value.data, | 319 | | constant_default_value.size); | 320 | | } | 321 | | } else { | 322 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | | null_map_data[i] = 1; | 324 | | col_to->insert_default(); | 325 | | continue; | 326 | | } | 327 | | auto value = default_value_str_col->get_data_at(i); | 328 | | col_to->insert_data(value.data, value.size); | 329 | | } | 330 | | } | 331 | 6 | } | 332 | 41 | } | 333 | | | 334 | 26 | if (is_nullable) { | 335 | 11 | block.replace_by_position( | 336 | 11 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 15 | } else { | 338 | 15 | block.replace_by_position(result, std::move(col_to)); | 339 | 15 | } | 340 | | | 341 | 26 | return Status::OK(); | 342 | 26 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 34 | uint32_t result, size_t input_rows_count) const override { | 185 | 34 | auto&& [col_from, col_from_is_const] = | 186 | 34 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 34 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 34 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 34 | bool is_nullable = false; | 200 | | | 201 | 34 | switch (nullable_mode) { | 202 | 34 | case NullalbeMode::NULLABLE: { | 203 | 34 | is_nullable = true; | 204 | 34 | break; | 205 | 0 | } | 206 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 0 | for (auto arg : arguments) { | 208 | 0 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 0 | } | 210 | 0 | break; | 211 | 0 | } | 212 | 34 | } | 213 | | | 214 | 34 | if (is_nullable) { | 215 | 34 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 34 | } | 217 | | | 218 | 34 | const ColumnString* col_from_string = nullptr; | 219 | 34 | if (col_from->is_nullable()) { | 220 | 11 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 11 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 11 | nullable_col.get_null_map_data()); | 224 | 11 | col_from_string = | 225 | 11 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 23 | } else { | 227 | 23 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 23 | } | 229 | | | 230 | 34 | StringRef constant_default_value; | 231 | 34 | bool default_value_const = false; | 232 | 34 | bool default_value_null_const = false; | 233 | 34 | ColumnPtr default_value_col; | 234 | 34 | JsonBinaryValue default_jsonb_value_parser; | 235 | 34 | const ColumnString* default_value_str_col = nullptr; | 236 | 34 | const NullMap* default_value_nullmap = nullptr; | 237 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | | if (state && state->has_const_default_value) { | 241 | | constant_default_value = state->default_value; | 242 | | default_value_null_const = state->default_is_null; | 243 | | default_value_const = true; | 244 | | } else if (arguments.size() > 1) { | 245 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | | PrimitiveType::TYPE_JSONB) { | 247 | | return Status::InvalidArgument( | 248 | | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | | block.get_by_position(arguments[1]).type->get_name()); | 250 | | } | 251 | | std::tie(default_value_col, default_value_const) = | 252 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | | if (default_value_const) { | 254 | | const JsonbDocument* default_value_doc = nullptr; | 255 | | if (default_value_col->is_null_at(0)) { | 256 | | default_value_null_const = true; | 257 | | } else { | 258 | | auto data = default_value_col->get_data_at(0); | 259 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | | &default_value_doc)); | 261 | | constant_default_value = data; | 262 | | } | 263 | | } else { | 264 | | if (default_value_col->is_nullable()) { | 265 | | const auto& nullable_col = | 266 | | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | | default_value_str_col = assert_cast<const ColumnString*>( | 268 | | nullable_col.get_nested_column_ptr().get()); | 269 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | | } else { | 271 | | default_value_str_col = | 272 | | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | | } | 274 | | } | 275 | | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | | default_value_const = true; | 279 | | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | | } | 282 | | } | 283 | | | 284 | 34 | auto col_to = ColumnString::create(); | 285 | | | 286 | 34 | col_to->reserve(input_rows_count); | 287 | | | 288 | 34 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 34 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 99 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 65 | if (is_nullable && null_map_data[i]) { | 295 | 6 | col_to->insert_default(); | 296 | 6 | continue; | 297 | 6 | } | 298 | | | 299 | 59 | auto index = index_check_const(i, col_from_is_const); | 300 | 59 | const auto& val = col_from_string->get_data_at(index); | 301 | 59 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 59 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 42 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 42 | } else { | 306 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | | return Status::InvalidArgument( | 308 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | 17 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | 17 | null_map_data[i] = 1; | 311 | 17 | col_to->insert_default(); | 312 | | } else { | 313 | | if (default_value_const) { | 314 | | if (default_value_null_const) { | 315 | | null_map_data[i] = 1; | 316 | | col_to->insert_default(); | 317 | | } else { | 318 | | col_to->insert_data(constant_default_value.data, | 319 | | constant_default_value.size); | 320 | | } | 321 | | } else { | 322 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | | null_map_data[i] = 1; | 324 | | col_to->insert_default(); | 325 | | continue; | 326 | | } | 327 | | auto value = default_value_str_col->get_data_at(i); | 328 | | col_to->insert_data(value.data, value.size); | 329 | | } | 330 | | } | 331 | 17 | } | 332 | 59 | } | 333 | | | 334 | 34 | if (is_nullable) { | 335 | 34 | block.replace_by_position( | 336 | 34 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 34 | } else { | 338 | 0 | block.replace_by_position(result, std::move(col_to)); | 339 | 0 | } | 340 | | | 341 | 34 | return Status::OK(); | 342 | 34 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 15 | uint32_t result, size_t input_rows_count) const override { | 185 | 15 | auto&& [col_from, col_from_is_const] = | 186 | 15 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 15 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 1 | auto col_str = ColumnString::create(); | 190 | 1 | col_str->insert_default(); | 191 | 1 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 1 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 1 | block.get_by_position(result).column = | 194 | 1 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 1 | return Status::OK(); | 196 | 1 | } | 197 | | | 198 | 14 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 14 | bool is_nullable = false; | 200 | | | 201 | 14 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 14 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 26 | for (auto arg : arguments) { | 208 | 26 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 26 | } | 210 | 14 | break; | 211 | 0 | } | 212 | 14 | } | 213 | | | 214 | 14 | if (is_nullable) { | 215 | 13 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 13 | } | 217 | | | 218 | 14 | const ColumnString* col_from_string = nullptr; | 219 | 14 | if (col_from->is_nullable()) { | 220 | 11 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 221 | | | 222 | 11 | VectorizedUtils::update_null_map(null_map->get_data(), | 223 | 11 | nullable_col.get_null_map_data()); | 224 | 11 | col_from_string = | 225 | 11 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 226 | 11 | } else { | 227 | 3 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 228 | 3 | } | 229 | | | 230 | 14 | StringRef constant_default_value; | 231 | 14 | bool default_value_const = false; | 232 | 14 | bool default_value_null_const = false; | 233 | 14 | ColumnPtr default_value_col; | 234 | 14 | JsonBinaryValue default_jsonb_value_parser; | 235 | 14 | const ColumnString* default_value_str_col = nullptr; | 236 | 14 | const NullMap* default_value_nullmap = nullptr; | 237 | 14 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 238 | 14 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 239 | 14 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 240 | 14 | if (state && state->has_const_default_value) { | 241 | 7 | constant_default_value = state->default_value; | 242 | 7 | default_value_null_const = state->default_is_null; | 243 | 7 | default_value_const = true; | 244 | 7 | } else if (arguments.size() > 1) { | 245 | 7 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 246 | 7 | PrimitiveType::TYPE_JSONB) { | 247 | 1 | return Status::InvalidArgument( | 248 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), | 249 | 1 | block.get_by_position(arguments[1]).type->get_name()); | 250 | 1 | } | 251 | 6 | std::tie(default_value_col, default_value_const) = | 252 | 6 | unpack_if_const(block.get_by_position(arguments[1]).column); | 253 | 6 | if (default_value_const) { | 254 | 1 | const JsonbDocument* default_value_doc = nullptr; | 255 | 1 | if (default_value_col->is_null_at(0)) { | 256 | 1 | default_value_null_const = true; | 257 | 1 | } else { | 258 | 0 | auto data = default_value_col->get_data_at(0); | 259 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 260 | 0 | &default_value_doc)); | 261 | 0 | constant_default_value = data; | 262 | 0 | } | 263 | 5 | } else { | 264 | 5 | if (default_value_col->is_nullable()) { | 265 | 4 | const auto& nullable_col = | 266 | 4 | assert_cast<const ColumnNullable&>(*default_value_col); | 267 | 4 | default_value_str_col = assert_cast<const ColumnString*>( | 268 | 4 | nullable_col.get_nested_column_ptr().get()); | 269 | 4 | default_value_nullmap = &(nullable_col.get_null_map_data()); | 270 | 4 | } else { | 271 | 1 | default_value_str_col = | 272 | 1 | assert_cast<const ColumnString*>(default_value_col.get()); | 273 | 1 | } | 274 | 5 | } | 275 | 6 | } else if (arguments.size() == 1) { | 276 | | // parse default value '{}' should always success. | 277 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 278 | 0 | default_value_const = true; | 279 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); | 280 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); | 281 | 0 | } | 282 | 14 | } | 283 | | | 284 | 13 | auto col_to = ColumnString::create(); | 285 | | | 286 | 14 | col_to->reserve(input_rows_count); | 287 | | | 288 | 14 | auto& null_map_data = null_map->get_data(); | 289 | | | 290 | | // parser can be reused for performance | 291 | 14 | JsonBinaryValue jsonb_value; | 292 | | | 293 | 115 | for (size_t i = 0; i < input_rows_count; ++i) { | 294 | 98 | if (is_nullable && null_map_data[i]) { | 295 | 6 | col_to->insert_default(); | 296 | 6 | continue; | 297 | 6 | } | 298 | | | 299 | 92 | auto index = index_check_const(i, col_from_is_const); | 300 | 92 | const auto& val = col_from_string->get_data_at(index); | 301 | 92 | auto st = jsonb_value.from_json_string(val.data, val.size); | 302 | 92 | if (st.ok()) { | 303 | | // insert jsonb format data | 304 | 61 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 305 | 61 | } else { | 306 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 307 | | return Status::InvalidArgument( | 308 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 309 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 310 | | null_map_data[i] = 1; | 311 | | col_to->insert_default(); | 312 | 31 | } else { | 313 | 31 | if (default_value_const) { | 314 | 9 | if (default_value_null_const) { | 315 | 3 | null_map_data[i] = 1; | 316 | 3 | col_to->insert_default(); | 317 | 6 | } else { | 318 | 6 | col_to->insert_data(constant_default_value.data, | 319 | 6 | constant_default_value.size); | 320 | 6 | } | 321 | 22 | } else { | 322 | 22 | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 323 | 3 | null_map_data[i] = 1; | 324 | 3 | col_to->insert_default(); | 325 | 3 | continue; | 326 | 3 | } | 327 | 19 | auto value = default_value_str_col->get_data_at(i); | 328 | 19 | col_to->insert_data(value.data, value.size); | 329 | 19 | } | 330 | 31 | } | 331 | 31 | } | 332 | 92 | } | 333 | | | 334 | 17 | if (is_nullable) { | 335 | 13 | block.replace_by_position( | 336 | 13 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 337 | 13 | } else { | 338 | 4 | block.replace_by_position(result, std::move(col_to)); | 339 | 4 | } | 340 | | | 341 | 17 | return Status::OK(); | 342 | 14 | } |
|
343 | | }; |
344 | | |
345 | | // jsonb_parse return type nullable as input |
346 | | using FunctionJsonbParse = |
347 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>; |
348 | | using FunctionJsonbParseErrorNull = |
349 | | FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>; |
350 | | using FunctionJsonbParseErrorValue = |
351 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>; |
352 | | |
353 | | // func(jsonb, [varchar, varchar, ...]) -> nullable(type) |
354 | | template <typename Impl> |
355 | | class FunctionJsonbExtract : public IFunction { |
356 | | public: |
357 | | static constexpr auto name = Impl::name; |
358 | | static constexpr auto alias = Impl::alias; |
359 | 1.64k | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv Line | Count | Source | 359 | 149 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv Line | Count | Source | 359 | 145 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv Line | Count | Source | 359 | 1.33k | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv Line | Count | Source | 359 | 15 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
|
360 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev |
361 | 1.61k | bool is_variadic() const override { return true; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv Line | Count | Source | 361 | 141 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv Line | Count | Source | 361 | 137 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv Line | Count | Source | 361 | 1.32k | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv Line | Count | Source | 361 | 7 | bool is_variadic() const override { return true; } |
|
362 | 1 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv Line | Count | Source | 362 | 1 | size_t get_number_of_arguments() const override { return 0; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv |
363 | 14.2k | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv Line | Count | Source | 363 | 1.46k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv Line | Count | Source | 363 | 1.46k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv Line | Count | Source | 363 | 11.2k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv Line | Count | Source | 363 | 12 | bool use_default_implementation_for_nulls() const override { return false; } |
|
364 | 1.60k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
365 | 1.60k | return make_nullable(std::make_shared<typename Impl::ReturnType>()); |
366 | 1.60k | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 364 | 140 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 365 | 140 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 366 | 140 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 364 | 136 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 365 | 136 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 366 | 136 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 364 | 1.32k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 365 | 1.32k | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 366 | 1.32k | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 364 | 6 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 365 | 6 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 366 | 6 | } |
|
367 | 32 | DataTypes get_variadic_argument_types_impl() const override { |
368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { |
369 | | return Impl::get_variadic_argument_types_impl(); |
370 | 32 | } else { |
371 | 32 | return {}; |
372 | 32 | } |
373 | 32 | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 8 | } else { | 371 | 8 | return {}; | 372 | 8 | } | 373 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 8 | } else { | 371 | 8 | return {}; | 372 | 8 | } | 373 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 8 | } else { | 371 | 8 | return {}; | 372 | 8 | } | 373 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv Line | Count | Source | 367 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 368 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 369 | | return Impl::get_variadic_argument_types_impl(); | 370 | 8 | } else { | 371 | 8 | return {}; | 372 | 8 | } | 373 | 8 | } |
|
374 | | |
375 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
376 | 12.6k | uint32_t result, size_t input_rows_count) const override { |
377 | 12.6k | DCHECK_GE(arguments.size(), 2); |
378 | | |
379 | 12.6k | ColumnPtr jsonb_data_column; |
380 | 12.6k | bool jsonb_data_const = false; |
381 | 12.6k | const NullMap* data_null_map = nullptr; |
382 | | |
383 | 12.6k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != |
384 | 12.6k | PrimitiveType::TYPE_JSONB) { |
385 | 1 | return Status::InvalidArgument( |
386 | 1 | "jsonb_extract first argument should be json type, but got {}", |
387 | 1 | block.get_by_position(arguments[0]).type->get_name()); |
388 | 1 | } |
389 | | |
390 | | // prepare jsonb data column |
391 | 12.6k | std::tie(jsonb_data_column, jsonb_data_const) = |
392 | 12.6k | unpack_if_const(block.get_by_position(arguments[0]).column); |
393 | 12.6k | if (jsonb_data_column->is_nullable()) { |
394 | 10.7k | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); |
395 | 10.7k | jsonb_data_column = nullable_column.get_nested_column_ptr(); |
396 | 10.7k | data_null_map = &nullable_column.get_null_map_data(); |
397 | 10.7k | } |
398 | 12.6k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); |
399 | 12.6k | const auto& loffsets = |
400 | 12.6k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); |
401 | | |
402 | | // prepare parse path column prepare |
403 | 12.6k | std::vector<const ColumnString*> jsonb_path_columns; |
404 | 12.6k | std::vector<bool> path_const(arguments.size() - 1); |
405 | 12.6k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); |
406 | 25.5k | for (int i = 0; i < arguments.size() - 1; ++i) { |
407 | 12.9k | ColumnPtr path_column; |
408 | 12.9k | bool is_const = false; |
409 | 12.9k | std::tie(path_column, is_const) = |
410 | 12.9k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
411 | 12.9k | path_const[i] = is_const; |
412 | 12.9k | if (path_column->is_nullable()) { |
413 | 66 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
414 | 66 | path_column = nullable_column.get_nested_column_ptr(); |
415 | 66 | path_null_maps[i] = &nullable_column.get_null_map_data(); |
416 | 66 | } |
417 | 12.9k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); |
418 | 12.9k | } |
419 | | |
420 | 12.6k | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
421 | 12.6k | auto res = Impl::ColumnType::create(); |
422 | | |
423 | | // execute Impl |
424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || |
425 | 11.2k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { |
426 | 11.2k | auto& res_data = res->get_chars(); |
427 | 11.2k | auto& res_offsets = res->get_offsets(); |
428 | 11.2k | RETURN_IF_ERROR(Impl::vector_vector_v2( |
429 | 11.2k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, |
430 | 11.2k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); |
431 | 11.2k | } else { |
432 | | // not support other extract type for now (e.g. int, double, ...) |
433 | 1.32k | DCHECK_EQ(jsonb_path_columns.size(), 1); |
434 | 1.32k | const auto& rdata = jsonb_path_columns[0]->get_chars(); |
435 | 1.32k | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); |
436 | | |
437 | 1.32k | auto create_all_null_result = [&]() { |
438 | 2 | res = Impl::ColumnType::create(); |
439 | 2 | res->insert_default(); |
440 | 2 | auto nullable_column = |
441 | 2 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
442 | 2 | auto const_column = |
443 | 2 | ColumnConst::create(std::move(nullable_column), input_rows_count); |
444 | 2 | block.get_by_position(result).column = std::move(const_column); |
445 | 2 | return Status::OK(); |
446 | 2 | }; |
447 | | |
448 | 1.32k | if (jsonb_data_const) { |
449 | 2 | if (data_null_map && (*data_null_map)[0]) { |
450 | 1 | return create_all_null_result(); |
451 | 1 | } |
452 | | |
453 | 1 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), |
454 | 1 | rdata, roffsets, path_null_maps[0], |
455 | 1 | res->get_data(), null_map->get_data())); |
456 | 1.32k | } else if (path_const[0]) { |
457 | 1.32k | if (path_null_maps[0] && (*path_null_maps[0])[0]) { |
458 | 1 | return create_all_null_result(); |
459 | 1 | } |
460 | 1.32k | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, |
461 | 1.32k | jsonb_path_columns[0]->get_data_at(0), |
462 | 1.32k | res->get_data(), null_map->get_data())); |
463 | 1.32k | } else { |
464 | 1 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, |
465 | 1 | roffsets, path_null_maps[0], res->get_data(), |
466 | 1 | null_map->get_data())); |
467 | 1 | } |
468 | 1.32k | } |
469 | | |
470 | 12.5k | block.get_by_position(result).column = |
471 | 12.6k | ColumnNullable::create(std::move(res), std::move(null_map)); |
472 | 12.6k | return Status::OK(); |
473 | 12.6k | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 376 | 1.32k | uint32_t result, size_t input_rows_count) const override { | 377 | 1.32k | DCHECK_GE(arguments.size(), 2); | 378 | | | 379 | 1.32k | ColumnPtr jsonb_data_column; | 380 | 1.32k | bool jsonb_data_const = false; | 381 | 1.32k | const NullMap* data_null_map = nullptr; | 382 | | | 383 | 1.32k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 384 | 1.32k | PrimitiveType::TYPE_JSONB) { | 385 | 0 | return Status::InvalidArgument( | 386 | 0 | "jsonb_extract first argument should be json type, but got {}", | 387 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 388 | 0 | } | 389 | | | 390 | | // prepare jsonb data column | 391 | 1.32k | std::tie(jsonb_data_column, jsonb_data_const) = | 392 | 1.32k | unpack_if_const(block.get_by_position(arguments[0]).column); | 393 | 1.32k | if (jsonb_data_column->is_nullable()) { | 394 | 1.14k | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 395 | 1.14k | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 396 | 1.14k | data_null_map = &nullable_column.get_null_map_data(); | 397 | 1.14k | } | 398 | 1.32k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 399 | 1.32k | const auto& loffsets = | 400 | 1.32k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 401 | | | 402 | | // prepare parse path column prepare | 403 | 1.32k | std::vector<const ColumnString*> jsonb_path_columns; | 404 | 1.32k | std::vector<bool> path_const(arguments.size() - 1); | 405 | 1.32k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 406 | 2.65k | for (int i = 0; i < arguments.size() - 1; ++i) { | 407 | 1.32k | ColumnPtr path_column; | 408 | 1.32k | bool is_const = false; | 409 | 1.32k | std::tie(path_column, is_const) = | 410 | 1.32k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 411 | 1.32k | path_const[i] = is_const; | 412 | 1.32k | if (path_column->is_nullable()) { | 413 | 5 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 414 | 5 | path_column = nullable_column.get_nested_column_ptr(); | 415 | 5 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 416 | 5 | } | 417 | 1.32k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 418 | 1.32k | } | 419 | | | 420 | 1.32k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 421 | 1.32k | auto res = Impl::ColumnType::create(); | 422 | | | 423 | | // execute Impl | 424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 425 | 1.32k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 426 | 1.32k | auto& res_data = res->get_chars(); | 427 | 1.32k | auto& res_offsets = res->get_offsets(); | 428 | 1.32k | RETURN_IF_ERROR(Impl::vector_vector_v2( | 429 | 1.32k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 430 | 1.32k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 431 | | } else { | 432 | | // not support other extract type for now (e.g. int, double, ...) | 433 | | DCHECK_EQ(jsonb_path_columns.size(), 1); | 434 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 435 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 436 | | | 437 | | auto create_all_null_result = [&]() { | 438 | | res = Impl::ColumnType::create(); | 439 | | res->insert_default(); | 440 | | auto nullable_column = | 441 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 442 | | auto const_column = | 443 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 444 | | block.get_by_position(result).column = std::move(const_column); | 445 | | return Status::OK(); | 446 | | }; | 447 | | | 448 | | if (jsonb_data_const) { | 449 | | if (data_null_map && (*data_null_map)[0]) { | 450 | | return create_all_null_result(); | 451 | | } | 452 | | | 453 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 454 | | rdata, roffsets, path_null_maps[0], | 455 | | res->get_data(), null_map->get_data())); | 456 | | } else if (path_const[0]) { | 457 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 458 | | return create_all_null_result(); | 459 | | } | 460 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 461 | | jsonb_path_columns[0]->get_data_at(0), | 462 | | res->get_data(), null_map->get_data())); | 463 | | } else { | 464 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 465 | | roffsets, path_null_maps[0], res->get_data(), | 466 | | null_map->get_data())); | 467 | | } | 468 | | } | 469 | | | 470 | 1.32k | block.get_by_position(result).column = | 471 | 1.32k | ColumnNullable::create(std::move(res), std::move(null_map)); | 472 | 1.32k | return Status::OK(); | 473 | 1.32k | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 376 | 1.32k | uint32_t result, size_t input_rows_count) const override { | 377 | 1.32k | DCHECK_GE(arguments.size(), 2); | 378 | | | 379 | 1.32k | ColumnPtr jsonb_data_column; | 380 | 1.32k | bool jsonb_data_const = false; | 381 | 1.32k | const NullMap* data_null_map = nullptr; | 382 | | | 383 | 1.32k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 384 | 1.32k | PrimitiveType::TYPE_JSONB) { | 385 | 0 | return Status::InvalidArgument( | 386 | 0 | "jsonb_extract first argument should be json type, but got {}", | 387 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 388 | 0 | } | 389 | | | 390 | | // prepare jsonb data column | 391 | 1.32k | std::tie(jsonb_data_column, jsonb_data_const) = | 392 | 1.32k | unpack_if_const(block.get_by_position(arguments[0]).column); | 393 | 1.32k | if (jsonb_data_column->is_nullable()) { | 394 | 1.14k | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 395 | 1.14k | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 396 | 1.14k | data_null_map = &nullable_column.get_null_map_data(); | 397 | 1.14k | } | 398 | 1.32k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 399 | 1.32k | const auto& loffsets = | 400 | 1.32k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 401 | | | 402 | | // prepare parse path column prepare | 403 | 1.32k | std::vector<const ColumnString*> jsonb_path_columns; | 404 | 1.32k | std::vector<bool> path_const(arguments.size() - 1); | 405 | 1.32k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 406 | 2.64k | for (int i = 0; i < arguments.size() - 1; ++i) { | 407 | 1.32k | ColumnPtr path_column; | 408 | 1.32k | bool is_const = false; | 409 | 1.32k | std::tie(path_column, is_const) = | 410 | 1.32k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 411 | 1.32k | path_const[i] = is_const; | 412 | 1.32k | if (path_column->is_nullable()) { | 413 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 414 | 4 | path_column = nullable_column.get_nested_column_ptr(); | 415 | 4 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 416 | 4 | } | 417 | 1.32k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 418 | 1.32k | } | 419 | | | 420 | 1.32k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 421 | 1.32k | auto res = Impl::ColumnType::create(); | 422 | | | 423 | | // execute Impl | 424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 425 | | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 426 | | auto& res_data = res->get_chars(); | 427 | | auto& res_offsets = res->get_offsets(); | 428 | | RETURN_IF_ERROR(Impl::vector_vector_v2( | 429 | | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 430 | | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 431 | 1.32k | } else { | 432 | | // not support other extract type for now (e.g. int, double, ...) | 433 | 1.32k | DCHECK_EQ(jsonb_path_columns.size(), 1); | 434 | 1.32k | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 435 | 1.32k | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 436 | | | 437 | 1.32k | auto create_all_null_result = [&]() { | 438 | 1.32k | res = Impl::ColumnType::create(); | 439 | 1.32k | res->insert_default(); | 440 | 1.32k | auto nullable_column = | 441 | 1.32k | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 442 | 1.32k | auto const_column = | 443 | 1.32k | ColumnConst::create(std::move(nullable_column), input_rows_count); | 444 | 1.32k | block.get_by_position(result).column = std::move(const_column); | 445 | 1.32k | return Status::OK(); | 446 | 1.32k | }; | 447 | | | 448 | 1.32k | if (jsonb_data_const) { | 449 | 2 | if (data_null_map && (*data_null_map)[0]) { | 450 | 1 | return create_all_null_result(); | 451 | 1 | } | 452 | | | 453 | 1 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 454 | 1 | rdata, roffsets, path_null_maps[0], | 455 | 1 | res->get_data(), null_map->get_data())); | 456 | 1.32k | } else if (path_const[0]) { | 457 | 1.32k | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 458 | 1 | return create_all_null_result(); | 459 | 1 | } | 460 | 1.32k | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 461 | 1.32k | jsonb_path_columns[0]->get_data_at(0), | 462 | 1.32k | res->get_data(), null_map->get_data())); | 463 | 1.32k | } else { | 464 | 1 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 465 | 1 | roffsets, path_null_maps[0], res->get_data(), | 466 | 1 | null_map->get_data())); | 467 | 1 | } | 468 | 1.32k | } | 469 | | | 470 | 1.32k | block.get_by_position(result).column = | 471 | 1.32k | ColumnNullable::create(std::move(res), std::move(null_map)); | 472 | 1.32k | return Status::OK(); | 473 | 1.32k | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 376 | 9.95k | uint32_t result, size_t input_rows_count) const override { | 377 | 9.95k | DCHECK_GE(arguments.size(), 2); | 378 | | | 379 | 9.95k | ColumnPtr jsonb_data_column; | 380 | 9.95k | bool jsonb_data_const = false; | 381 | 9.95k | const NullMap* data_null_map = nullptr; | 382 | | | 383 | 9.95k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 384 | 9.95k | PrimitiveType::TYPE_JSONB) { | 385 | 1 | return Status::InvalidArgument( | 386 | 1 | "jsonb_extract first argument should be json type, but got {}", | 387 | 1 | block.get_by_position(arguments[0]).type->get_name()); | 388 | 1 | } | 389 | | | 390 | | // prepare jsonb data column | 391 | 9.95k | std::tie(jsonb_data_column, jsonb_data_const) = | 392 | 9.95k | unpack_if_const(block.get_by_position(arguments[0]).column); | 393 | 9.95k | if (jsonb_data_column->is_nullable()) { | 394 | 8.49k | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 395 | 8.49k | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 396 | 8.49k | data_null_map = &nullable_column.get_null_map_data(); | 397 | 8.49k | } | 398 | 9.95k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 399 | 9.95k | const auto& loffsets = | 400 | 9.95k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 401 | | | 402 | | // prepare parse path column prepare | 403 | 9.95k | std::vector<const ColumnString*> jsonb_path_columns; | 404 | 9.95k | std::vector<bool> path_const(arguments.size() - 1); | 405 | 9.95k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 406 | 20.2k | for (int i = 0; i < arguments.size() - 1; ++i) { | 407 | 10.2k | ColumnPtr path_column; | 408 | 10.2k | bool is_const = false; | 409 | 10.2k | std::tie(path_column, is_const) = | 410 | 10.2k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 411 | 10.2k | path_const[i] = is_const; | 412 | 10.2k | if (path_column->is_nullable()) { | 413 | 56 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 414 | 56 | path_column = nullable_column.get_nested_column_ptr(); | 415 | 56 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 416 | 56 | } | 417 | 10.2k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 418 | 10.2k | } | 419 | | | 420 | 9.95k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 421 | 9.95k | auto res = Impl::ColumnType::create(); | 422 | | | 423 | | // execute Impl | 424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 425 | 9.95k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 426 | 9.95k | auto& res_data = res->get_chars(); | 427 | 9.95k | auto& res_offsets = res->get_offsets(); | 428 | 9.95k | RETURN_IF_ERROR(Impl::vector_vector_v2( | 429 | 9.95k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 430 | 9.95k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 431 | | } else { | 432 | | // not support other extract type for now (e.g. int, double, ...) | 433 | | DCHECK_EQ(jsonb_path_columns.size(), 1); | 434 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 435 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 436 | | | 437 | | auto create_all_null_result = [&]() { | 438 | | res = Impl::ColumnType::create(); | 439 | | res->insert_default(); | 440 | | auto nullable_column = | 441 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 442 | | auto const_column = | 443 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 444 | | block.get_by_position(result).column = std::move(const_column); | 445 | | return Status::OK(); | 446 | | }; | 447 | | | 448 | | if (jsonb_data_const) { | 449 | | if (data_null_map && (*data_null_map)[0]) { | 450 | | return create_all_null_result(); | 451 | | } | 452 | | | 453 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 454 | | rdata, roffsets, path_null_maps[0], | 455 | | res->get_data(), null_map->get_data())); | 456 | | } else if (path_const[0]) { | 457 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 458 | | return create_all_null_result(); | 459 | | } | 460 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 461 | | jsonb_path_columns[0]->get_data_at(0), | 462 | | res->get_data(), null_map->get_data())); | 463 | | } else { | 464 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 465 | | roffsets, path_null_maps[0], res->get_data(), | 466 | | null_map->get_data())); | 467 | | } | 468 | | } | 469 | | | 470 | 9.94k | block.get_by_position(result).column = | 471 | 9.95k | ColumnNullable::create(std::move(res), std::move(null_map)); | 472 | 9.95k | return Status::OK(); | 473 | 9.95k | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 376 | 6 | uint32_t result, size_t input_rows_count) const override { | 377 | 6 | DCHECK_GE(arguments.size(), 2); | 378 | | | 379 | 6 | ColumnPtr jsonb_data_column; | 380 | 6 | bool jsonb_data_const = false; | 381 | 6 | const NullMap* data_null_map = nullptr; | 382 | | | 383 | 6 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 384 | 6 | PrimitiveType::TYPE_JSONB) { | 385 | 0 | return Status::InvalidArgument( | 386 | 0 | "jsonb_extract first argument should be json type, but got {}", | 387 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 388 | 0 | } | 389 | | | 390 | | // prepare jsonb data column | 391 | 6 | std::tie(jsonb_data_column, jsonb_data_const) = | 392 | 6 | unpack_if_const(block.get_by_position(arguments[0]).column); | 393 | 6 | if (jsonb_data_column->is_nullable()) { | 394 | 6 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 395 | 6 | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 396 | 6 | data_null_map = &nullable_column.get_null_map_data(); | 397 | 6 | } | 398 | 6 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 399 | 6 | const auto& loffsets = | 400 | 6 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 401 | | | 402 | | // prepare parse path column prepare | 403 | 6 | std::vector<const ColumnString*> jsonb_path_columns; | 404 | 6 | std::vector<bool> path_const(arguments.size() - 1); | 405 | 6 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 406 | 16 | for (int i = 0; i < arguments.size() - 1; ++i) { | 407 | 10 | ColumnPtr path_column; | 408 | 10 | bool is_const = false; | 409 | 10 | std::tie(path_column, is_const) = | 410 | 10 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 411 | 10 | path_const[i] = is_const; | 412 | 10 | if (path_column->is_nullable()) { | 413 | 1 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 414 | 1 | path_column = nullable_column.get_nested_column_ptr(); | 415 | 1 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 416 | 1 | } | 417 | 10 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 418 | 10 | } | 419 | | | 420 | 6 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 421 | 6 | auto res = Impl::ColumnType::create(); | 422 | | | 423 | | // execute Impl | 424 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 425 | 6 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 426 | 6 | auto& res_data = res->get_chars(); | 427 | 6 | auto& res_offsets = res->get_offsets(); | 428 | 6 | RETURN_IF_ERROR(Impl::vector_vector_v2( | 429 | 6 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 430 | 6 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 431 | | } else { | 432 | | // not support other extract type for now (e.g. int, double, ...) | 433 | | DCHECK_EQ(jsonb_path_columns.size(), 1); | 434 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 435 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 436 | | | 437 | | auto create_all_null_result = [&]() { | 438 | | res = Impl::ColumnType::create(); | 439 | | res->insert_default(); | 440 | | auto nullable_column = | 441 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 442 | | auto const_column = | 443 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 444 | | block.get_by_position(result).column = std::move(const_column); | 445 | | return Status::OK(); | 446 | | }; | 447 | | | 448 | | if (jsonb_data_const) { | 449 | | if (data_null_map && (*data_null_map)[0]) { | 450 | | return create_all_null_result(); | 451 | | } | 452 | | | 453 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 454 | | rdata, roffsets, path_null_maps[0], | 455 | | res->get_data(), null_map->get_data())); | 456 | | } else if (path_const[0]) { | 457 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 458 | | return create_all_null_result(); | 459 | | } | 460 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 461 | | jsonb_path_columns[0]->get_data_at(0), | 462 | | res->get_data(), null_map->get_data())); | 463 | | } else { | 464 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 465 | | roffsets, path_null_maps[0], res->get_data(), | 466 | | null_map->get_data())); | 467 | | } | 468 | | } | 469 | | | 470 | 6 | block.get_by_position(result).column = | 471 | 6 | ColumnNullable::create(std::move(res), std::move(null_map)); | 472 | 6 | return Status::OK(); | 473 | 6 | } |
|
474 | | }; |
475 | | |
476 | | class FunctionJsonbKeys : public IFunction { |
477 | | public: |
478 | | static constexpr auto name = "json_keys"; |
479 | | static constexpr auto alias = "jsonb_keys"; |
480 | 48 | static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); } |
481 | 0 | String get_name() const override { return name; } |
482 | 40 | bool is_variadic() const override { return true; } |
483 | 0 | size_t get_number_of_arguments() const override { return 0; } |
484 | | |
485 | 141 | bool use_default_implementation_for_nulls() const override { return false; } |
486 | | |
487 | 39 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
488 | 39 | return make_nullable( |
489 | 39 | std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>()))); |
490 | 39 | } |
491 | | |
492 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
493 | 102 | uint32_t result, size_t input_rows_count) const override { |
494 | 102 | DCHECK_GE(arguments.size(), 1); |
495 | 102 | DCHECK(arguments.size() == 1 || arguments.size() == 2) |
496 | 0 | << "json_keys should have 1 or 2 arguments, but got " << arguments.size(); |
497 | | |
498 | 102 | const NullMap* data_null_map = nullptr; |
499 | 102 | const ColumnString* col_from_string = nullptr; |
500 | | // prepare jsonb data column |
501 | 102 | auto&& [jsonb_data_column, json_data_const] = |
502 | 102 | unpack_if_const(block.get_by_position(arguments[0]).column); |
503 | 102 | if (jsonb_data_column->is_nullable()) { |
504 | 98 | const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get()); |
505 | 98 | col_from_string = |
506 | 98 | assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
507 | 98 | data_null_map = &nullable->get_null_map_data(); |
508 | 98 | } else { |
509 | 4 | col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
510 | 4 | } |
511 | | |
512 | | // prepare parse path column prepare, maybe we do not have path column |
513 | 102 | ColumnPtr jsonb_path_column = nullptr; |
514 | 102 | const ColumnString* jsonb_path_col = nullptr; |
515 | 102 | bool path_const = false; |
516 | 102 | const NullMap* path_null_map = nullptr; |
517 | 102 | if (arguments.size() == 2) { |
518 | | // we have should have a ColumnString for path |
519 | 75 | std::tie(jsonb_path_column, path_const) = |
520 | 75 | unpack_if_const(block.get_by_position(arguments[1]).column); |
521 | 75 | if (jsonb_path_column->is_nullable()) { |
522 | 10 | const auto* nullable = |
523 | 10 | check_and_get_column<ColumnNullable>(jsonb_path_column.get()); |
524 | 10 | jsonb_path_column = nullable->get_nested_column_ptr(); |
525 | 10 | path_null_map = &nullable->get_null_map_data(); |
526 | 10 | } |
527 | 75 | jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get()); |
528 | 75 | } |
529 | | |
530 | 102 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
531 | 102 | NullMap& res_null_map = null_map->get_data(); |
532 | | |
533 | 102 | auto dst_arr = ColumnArray::create( |
534 | 102 | ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()), |
535 | 102 | ColumnArray::ColumnOffsets::create()); |
536 | 102 | auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data()); |
537 | | |
538 | 102 | Status st = std::visit( |
539 | 102 | [&](auto data_const, auto has_path, auto path_const) { |
540 | 102 | return inner_loop_impl<data_const, has_path, path_const>( |
541 | 102 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, |
542 | 102 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); |
543 | 102 | }, _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 539 | 27 | [&](auto data_const, auto has_path, auto path_const) { | 540 | 27 | return inner_loop_impl<data_const, has_path, path_const>( | 541 | 27 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 542 | 27 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 543 | 27 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 539 | 25 | [&](auto data_const, auto has_path, auto path_const) { | 540 | 25 | return inner_loop_impl<data_const, has_path, path_const>( | 541 | 25 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 542 | 25 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 543 | 25 | }, |
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 539 | 48 | [&](auto data_const, auto has_path, auto path_const) { | 540 | 48 | return inner_loop_impl<data_const, has_path, path_const>( | 541 | 48 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 542 | 48 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 543 | 48 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 539 | 2 | [&](auto data_const, auto has_path, auto path_const) { | 540 | 2 | return inner_loop_impl<data_const, has_path, path_const>( | 541 | 2 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 542 | 2 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 543 | 2 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
544 | 102 | make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column), |
545 | 102 | make_bool_variant(path_const)); |
546 | 102 | if (!st.ok()) { |
547 | 9 | return st; |
548 | 9 | } |
549 | 93 | block.get_by_position(result).column = |
550 | 93 | ColumnNullable::create(std::move(dst_arr), std::move(null_map)); |
551 | 93 | return st; |
552 | 102 | } |
553 | | |
554 | | private: |
555 | | template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST> |
556 | | static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr, |
557 | | ColumnNullable& dst_nested_column, |
558 | | NullMap& res_null_map, |
559 | | const ColumnString& col_from_string, |
560 | | const NullMap* jsonb_data_nullmap, |
561 | | const ColumnString* jsonb_path_column, |
562 | 102 | const NullMap* path_null_map) { |
563 | | // if path is const, we just need to parse it once |
564 | 102 | JsonbPath const_path; |
565 | 102 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { |
566 | 48 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); |
567 | 48 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { |
568 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
569 | 1 | r_raw_ref.to_string()); |
570 | 1 | } |
571 | | |
572 | 47 | if (const_path.is_wildcard()) { |
573 | 2 | return Status::InvalidJsonPath( |
574 | 2 | "In this situation, path expressions may not contain the * and ** tokens " |
575 | 2 | "or an array range."); |
576 | 2 | } |
577 | 47 | } |
578 | | |
579 | 378 | for (size_t i = 0; i < input_rows_count; ++i) { |
580 | 268 | auto index = index_check_const(i, JSONB_DATA_CONST); |
581 | | // if jsonb data is null or path column is null , we should return null |
582 | 268 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { |
583 | 23 | res_null_map[i] = 1; |
584 | 23 | dst_arr.insert_default(); |
585 | 23 | continue; |
586 | 23 | } |
587 | 245 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { |
588 | 69 | if (path_null_map && (*path_null_map)[i]) { |
589 | 8 | res_null_map[i] = 1; |
590 | 8 | dst_arr.insert_default(); |
591 | 8 | continue; |
592 | 8 | } |
593 | 69 | } |
594 | | |
595 | 61 | auto json_data = col_from_string.get_data_at(index); |
596 | 245 | const JsonbDocument* doc = nullptr; |
597 | 245 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); |
598 | 245 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
599 | 0 | dst_arr.clear(); |
600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); |
601 | 0 | } |
602 | 245 | const JsonbValue* obj_val; |
603 | 245 | JsonbFindResult find_result; |
604 | 245 | if constexpr (JSONB_PATH_PARAM) { |
605 | 191 | if constexpr (!JSON_PATH_CONST) { |
606 | 69 | auto data = jsonb_path_column->get_data_at(i); |
607 | 69 | JsonbPath path; |
608 | 69 | if (!path.seek(data.data, data.size)) { |
609 | 5 | return Status::InvalidArgument( |
610 | 5 | "Json path error: Invalid Json Path for value: {} at row: {}", |
611 | 5 | std::string_view(data.data, data.size), i); |
612 | 5 | } |
613 | | |
614 | 64 | if (path.is_wildcard()) { |
615 | 1 | return Status::InvalidJsonPath( |
616 | 1 | "In this situation, path expressions may not contain the * and ** " |
617 | 1 | "tokens " |
618 | 1 | "or an array range. at row: {}", |
619 | 1 | i); |
620 | 1 | } |
621 | 63 | find_result = doc->getValue()->findValue(path); |
622 | 122 | } else { |
623 | 122 | find_result = doc->getValue()->findValue(const_path); |
624 | 122 | } |
625 | 0 | obj_val = find_result.value; |
626 | 191 | } else { |
627 | 54 | obj_val = doc->getValue(); |
628 | 54 | } |
629 | | |
630 | 245 | if (!obj_val || !obj_val->isObject()) { |
631 | | // if jsonb data is not object we should return null |
632 | 182 | res_null_map[i] = 1; |
633 | 182 | dst_arr.insert_default(); |
634 | 182 | continue; |
635 | 182 | } |
636 | 63 | const auto* obj = obj_val->unpack<ObjectVal>(); |
637 | 75 | for (const auto& it : *obj) { |
638 | 75 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); |
639 | 75 | } |
640 | 63 | dst_arr.get_offsets().push_back(dst_nested_column.size()); |
641 | 63 | } //for |
642 | 110 | return Status::OK(); |
643 | 102 | } _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 562 | 27 | const NullMap* path_null_map) { | 563 | | // if path is const, we just need to parse it once | 564 | 27 | JsonbPath const_path; | 565 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 566 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 567 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 568 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 569 | | r_raw_ref.to_string()); | 570 | | } | 571 | | | 572 | | if (const_path.is_wildcard()) { | 573 | | return Status::InvalidJsonPath( | 574 | | "In this situation, path expressions may not contain the * and ** tokens " | 575 | | "or an array range."); | 576 | | } | 577 | | } | 578 | | | 579 | 85 | for (size_t i = 0; i < input_rows_count; ++i) { | 580 | 58 | auto index = index_check_const(i, JSONB_DATA_CONST); | 581 | | // if jsonb data is null or path column is null , we should return null | 582 | 58 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 583 | 4 | res_null_map[i] = 1; | 584 | 4 | dst_arr.insert_default(); | 585 | 4 | continue; | 586 | 4 | } | 587 | | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 588 | | if (path_null_map && (*path_null_map)[i]) { | 589 | | res_null_map[i] = 1; | 590 | | dst_arr.insert_default(); | 591 | | continue; | 592 | | } | 593 | | } | 594 | | | 595 | 54 | auto json_data = col_from_string.get_data_at(index); | 596 | 54 | const JsonbDocument* doc = nullptr; | 597 | 54 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 598 | 54 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 599 | 0 | dst_arr.clear(); | 600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 601 | 0 | } | 602 | 54 | const JsonbValue* obj_val; | 603 | 54 | JsonbFindResult find_result; | 604 | | if constexpr (JSONB_PATH_PARAM) { | 605 | | if constexpr (!JSON_PATH_CONST) { | 606 | | auto data = jsonb_path_column->get_data_at(i); | 607 | | JsonbPath path; | 608 | | if (!path.seek(data.data, data.size)) { | 609 | | return Status::InvalidArgument( | 610 | | "Json path error: Invalid Json Path for value: {} at row: {}", | 611 | | std::string_view(data.data, data.size), i); | 612 | | } | 613 | | | 614 | | if (path.is_wildcard()) { | 615 | | return Status::InvalidJsonPath( | 616 | | "In this situation, path expressions may not contain the * and ** " | 617 | | "tokens " | 618 | | "or an array range. at row: {}", | 619 | | i); | 620 | | } | 621 | | find_result = doc->getValue()->findValue(path); | 622 | | } else { | 623 | | find_result = doc->getValue()->findValue(const_path); | 624 | | } | 625 | | obj_val = find_result.value; | 626 | 54 | } else { | 627 | 54 | obj_val = doc->getValue(); | 628 | 54 | } | 629 | | | 630 | 54 | if (!obj_val || !obj_val->isObject()) { | 631 | | // if jsonb data is not object we should return null | 632 | 36 | res_null_map[i] = 1; | 633 | 36 | dst_arr.insert_default(); | 634 | 36 | continue; | 635 | 36 | } | 636 | 18 | const auto* obj = obj_val->unpack<ObjectVal>(); | 637 | 36 | for (const auto& it : *obj) { | 638 | 36 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 639 | 36 | } | 640 | 18 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 641 | 18 | } //for | 642 | 27 | return Status::OK(); | 643 | 27 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 562 | 25 | const NullMap* path_null_map) { | 563 | | // if path is const, we just need to parse it once | 564 | 25 | JsonbPath const_path; | 565 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 566 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 567 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 568 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 569 | | r_raw_ref.to_string()); | 570 | | } | 571 | | | 572 | | if (const_path.is_wildcard()) { | 573 | | return Status::InvalidJsonPath( | 574 | | "In this situation, path expressions may not contain the * and ** tokens " | 575 | | "or an array range."); | 576 | | } | 577 | | } | 578 | | | 579 | 80 | for (size_t i = 0; i < input_rows_count; ++i) { | 580 | 51 | auto index = index_check_const(i, JSONB_DATA_CONST); | 581 | | // if jsonb data is null or path column is null , we should return null | 582 | 51 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 583 | 6 | res_null_map[i] = 1; | 584 | 6 | dst_arr.insert_default(); | 585 | 6 | continue; | 586 | 6 | } | 587 | 45 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 588 | 45 | if (path_null_map && (*path_null_map)[i]) { | 589 | 4 | res_null_map[i] = 1; | 590 | 4 | dst_arr.insert_default(); | 591 | 4 | continue; | 592 | 4 | } | 593 | 45 | } | 594 | | | 595 | 41 | auto json_data = col_from_string.get_data_at(index); | 596 | 45 | const JsonbDocument* doc = nullptr; | 597 | 45 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 598 | 45 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 599 | 0 | dst_arr.clear(); | 600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 601 | 0 | } | 602 | 45 | const JsonbValue* obj_val; | 603 | 45 | JsonbFindResult find_result; | 604 | 45 | if constexpr (JSONB_PATH_PARAM) { | 605 | 45 | if constexpr (!JSON_PATH_CONST) { | 606 | 45 | auto data = jsonb_path_column->get_data_at(i); | 607 | 45 | JsonbPath path; | 608 | 45 | if (!path.seek(data.data, data.size)) { | 609 | 5 | return Status::InvalidArgument( | 610 | 5 | "Json path error: Invalid Json Path for value: {} at row: {}", | 611 | 5 | std::string_view(data.data, data.size), i); | 612 | 5 | } | 613 | | | 614 | 40 | if (path.is_wildcard()) { | 615 | 1 | return Status::InvalidJsonPath( | 616 | 1 | "In this situation, path expressions may not contain the * and ** " | 617 | 1 | "tokens " | 618 | 1 | "or an array range. at row: {}", | 619 | 1 | i); | 620 | 1 | } | 621 | 39 | find_result = doc->getValue()->findValue(path); | 622 | | } else { | 623 | | find_result = doc->getValue()->findValue(const_path); | 624 | | } | 625 | 0 | obj_val = find_result.value; | 626 | | } else { | 627 | | obj_val = doc->getValue(); | 628 | | } | 629 | | | 630 | 45 | if (!obj_val || !obj_val->isObject()) { | 631 | | // if jsonb data is not object we should return null | 632 | 25 | res_null_map[i] = 1; | 633 | 25 | dst_arr.insert_default(); | 634 | 25 | continue; | 635 | 25 | } | 636 | 20 | const auto* obj = obj_val->unpack<ObjectVal>(); | 637 | 20 | for (const auto& it : *obj) { | 638 | 14 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 639 | 14 | } | 640 | 20 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 641 | 20 | } //for | 642 | 29 | return Status::OK(); | 643 | 25 | } |
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 562 | 48 | const NullMap* path_null_map) { | 563 | | // if path is const, we just need to parse it once | 564 | 48 | JsonbPath const_path; | 565 | 48 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 566 | 48 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 567 | 48 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 568 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 569 | 1 | r_raw_ref.to_string()); | 570 | 1 | } | 571 | | | 572 | 47 | if (const_path.is_wildcard()) { | 573 | 2 | return Status::InvalidJsonPath( | 574 | 2 | "In this situation, path expressions may not contain the * and ** tokens " | 575 | 2 | "or an array range."); | 576 | 2 | } | 577 | 47 | } | 578 | | | 579 | 183 | for (size_t i = 0; i < input_rows_count; ++i) { | 580 | 135 | auto index = index_check_const(i, JSONB_DATA_CONST); | 581 | | // if jsonb data is null or path column is null , we should return null | 582 | 135 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 583 | 13 | res_null_map[i] = 1; | 584 | 13 | dst_arr.insert_default(); | 585 | 13 | continue; | 586 | 13 | } | 587 | | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 588 | | if (path_null_map && (*path_null_map)[i]) { | 589 | | res_null_map[i] = 1; | 590 | | dst_arr.insert_default(); | 591 | | continue; | 592 | | } | 593 | | } | 594 | | | 595 | 122 | auto json_data = col_from_string.get_data_at(index); | 596 | 122 | const JsonbDocument* doc = nullptr; | 597 | 122 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 598 | 122 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 599 | 0 | dst_arr.clear(); | 600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 601 | 0 | } | 602 | 122 | const JsonbValue* obj_val; | 603 | 122 | JsonbFindResult find_result; | 604 | 122 | if constexpr (JSONB_PATH_PARAM) { | 605 | | if constexpr (!JSON_PATH_CONST) { | 606 | | auto data = jsonb_path_column->get_data_at(i); | 607 | | JsonbPath path; | 608 | | if (!path.seek(data.data, data.size)) { | 609 | | return Status::InvalidArgument( | 610 | | "Json path error: Invalid Json Path for value: {} at row: {}", | 611 | | std::string_view(data.data, data.size), i); | 612 | | } | 613 | | | 614 | | if (path.is_wildcard()) { | 615 | | return Status::InvalidJsonPath( | 616 | | "In this situation, path expressions may not contain the * and ** " | 617 | | "tokens " | 618 | | "or an array range. at row: {}", | 619 | | i); | 620 | | } | 621 | | find_result = doc->getValue()->findValue(path); | 622 | 122 | } else { | 623 | 122 | find_result = doc->getValue()->findValue(const_path); | 624 | 122 | } | 625 | 122 | obj_val = find_result.value; | 626 | | } else { | 627 | | obj_val = doc->getValue(); | 628 | | } | 629 | | | 630 | 122 | if (!obj_val || !obj_val->isObject()) { | 631 | | // if jsonb data is not object we should return null | 632 | 113 | res_null_map[i] = 1; | 633 | 113 | dst_arr.insert_default(); | 634 | 113 | continue; | 635 | 113 | } | 636 | 9 | const auto* obj = obj_val->unpack<ObjectVal>(); | 637 | 9 | for (const auto& it : *obj) { | 638 | 9 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 639 | 9 | } | 640 | 9 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 641 | 9 | } //for | 642 | 48 | return Status::OK(); | 643 | 48 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 562 | 2 | const NullMap* path_null_map) { | 563 | | // if path is const, we just need to parse it once | 564 | 2 | JsonbPath const_path; | 565 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 566 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 567 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 568 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 569 | | r_raw_ref.to_string()); | 570 | | } | 571 | | | 572 | | if (const_path.is_wildcard()) { | 573 | | return Status::InvalidJsonPath( | 574 | | "In this situation, path expressions may not contain the * and ** tokens " | 575 | | "or an array range."); | 576 | | } | 577 | | } | 578 | | | 579 | 30 | for (size_t i = 0; i < input_rows_count; ++i) { | 580 | 24 | auto index = index_check_const(i, JSONB_DATA_CONST); | 581 | | // if jsonb data is null or path column is null , we should return null | 582 | 24 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 583 | 0 | res_null_map[i] = 1; | 584 | 0 | dst_arr.insert_default(); | 585 | 0 | continue; | 586 | 0 | } | 587 | 24 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 588 | 24 | if (path_null_map && (*path_null_map)[i]) { | 589 | 4 | res_null_map[i] = 1; | 590 | 4 | dst_arr.insert_default(); | 591 | 4 | continue; | 592 | 4 | } | 593 | 24 | } | 594 | | | 595 | 20 | auto json_data = col_from_string.get_data_at(index); | 596 | 24 | const JsonbDocument* doc = nullptr; | 597 | 24 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 598 | 24 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 599 | 0 | dst_arr.clear(); | 600 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 601 | 0 | } | 602 | 24 | const JsonbValue* obj_val; | 603 | 24 | JsonbFindResult find_result; | 604 | 24 | if constexpr (JSONB_PATH_PARAM) { | 605 | 24 | if constexpr (!JSON_PATH_CONST) { | 606 | 24 | auto data = jsonb_path_column->get_data_at(i); | 607 | 24 | JsonbPath path; | 608 | 24 | if (!path.seek(data.data, data.size)) { | 609 | 0 | return Status::InvalidArgument( | 610 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", | 611 | 0 | std::string_view(data.data, data.size), i); | 612 | 0 | } | 613 | | | 614 | 24 | if (path.is_wildcard()) { | 615 | 0 | return Status::InvalidJsonPath( | 616 | 0 | "In this situation, path expressions may not contain the * and ** " | 617 | 0 | "tokens " | 618 | 0 | "or an array range. at row: {}", | 619 | 0 | i); | 620 | 0 | } | 621 | 24 | find_result = doc->getValue()->findValue(path); | 622 | | } else { | 623 | | find_result = doc->getValue()->findValue(const_path); | 624 | | } | 625 | 0 | obj_val = find_result.value; | 626 | | } else { | 627 | | obj_val = doc->getValue(); | 628 | | } | 629 | | | 630 | 24 | if (!obj_val || !obj_val->isObject()) { | 631 | | // if jsonb data is not object we should return null | 632 | 8 | res_null_map[i] = 1; | 633 | 8 | dst_arr.insert_default(); | 634 | 8 | continue; | 635 | 8 | } | 636 | 16 | const auto* obj = obj_val->unpack<ObjectVal>(); | 637 | 16 | for (const auto& it : *obj) { | 638 | 16 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 639 | 16 | } | 640 | 16 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 641 | 16 | } //for | 642 | 6 | return Status::OK(); | 643 | 2 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ |
644 | | }; |
645 | | |
646 | | class FunctionJsonbExtractPath : public IFunction { |
647 | | public: |
648 | | static constexpr auto name = "json_exists_path"; |
649 | | static constexpr auto alias = "jsonb_exists_path"; |
650 | | using ColumnType = ColumnUInt8; |
651 | | using Container = typename ColumnType::Container; |
652 | 183 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); } |
653 | 1 | String get_name() const override { return name; } |
654 | 174 | size_t get_number_of_arguments() const override { return 2; } |
655 | 174 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
656 | | // it only needs to indicate existence and does not need to return nullable values. |
657 | 174 | const auto nullable = std::ranges::any_of( |
658 | 196 | arguments, [](const DataTypePtr& type) { return type->is_nullable(); }); |
659 | 174 | if (nullable) { |
660 | 153 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
661 | 153 | } else { |
662 | 21 | return std::make_shared<DataTypeUInt8>(); |
663 | 21 | } |
664 | 174 | } |
665 | | |
666 | 1.53k | bool use_default_implementation_for_nulls() const override { return false; } |
667 | | |
668 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
669 | 1.35k | uint32_t result, size_t input_rows_count) const override { |
670 | | // prepare jsonb data column |
671 | 1.35k | auto&& [jsonb_data_column, jsonb_data_const] = |
672 | 1.35k | unpack_if_const(block.get_by_position(arguments[0]).column); |
673 | | |
674 | 1.35k | const NullMap* data_null_map = nullptr; |
675 | 1.35k | const ColumnString* data_col = nullptr; |
676 | 1.35k | if (jsonb_data_column->is_nullable()) { |
677 | 1.17k | const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get()); |
678 | 1.17k | data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
679 | 1.17k | data_null_map = &nullable->get_null_map_data(); |
680 | 1.17k | } else { |
681 | 182 | data_col = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
682 | 182 | } |
683 | | |
684 | 1.35k | const auto& ldata = data_col->get_chars(); |
685 | 1.35k | const auto& loffsets = data_col->get_offsets(); |
686 | | |
687 | | // prepare parse path column prepare |
688 | 1.35k | auto&& [path_column, path_const] = |
689 | 1.35k | unpack_if_const(block.get_by_position(arguments[1]).column); |
690 | 1.35k | const ColumnString* path_col = nullptr; |
691 | 1.35k | const NullMap* path_null_map = nullptr; |
692 | 1.35k | if (path_column->is_nullable()) { |
693 | 7 | const auto* nullable = assert_cast<const ColumnNullable*>(path_column.get()); |
694 | 7 | path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
695 | 7 | path_null_map = &nullable->get_null_map_data(); |
696 | 1.35k | } else { |
697 | 1.35k | path_col = assert_cast<const ColumnString*>(path_column.get()); |
698 | 1.35k | } |
699 | | |
700 | 18.4E | DCHECK(!(jsonb_data_const && path_const)) |
701 | 18.4E | << "jsonb_data_const and path_const should not be both const"; |
702 | | |
703 | 1.35k | auto create_all_null_result = [&]() { |
704 | 3 | auto res = ColumnType::create(); |
705 | 3 | res->insert_default(); |
706 | 3 | auto nullable_column = |
707 | 3 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
708 | 3 | auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count); |
709 | 3 | block.get_by_position(result).column = std::move(const_column); |
710 | 3 | return Status::OK(); |
711 | 3 | }; |
712 | | |
713 | 1.35k | MutableColumnPtr result_null_map_column; |
714 | 1.35k | NullMap* result_null_map = nullptr; |
715 | 1.35k | if (data_null_map || path_null_map) { |
716 | 1.17k | result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
717 | 1.17k | result_null_map = &assert_cast<ColumnUInt8&>(*result_null_map_column).get_data(); |
718 | | |
719 | 1.17k | if (data_null_map) { |
720 | 1.17k | VectorizedUtils::update_null_map(*result_null_map, *data_null_map, |
721 | 1.17k | jsonb_data_const); |
722 | 1.17k | } |
723 | | |
724 | 1.17k | if (path_null_map) { |
725 | 7 | VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const); |
726 | 7 | } |
727 | | |
728 | 1.17k | if (!simd::contain_zero(result_null_map->data(), input_rows_count)) { |
729 | 3 | return create_all_null_result(); |
730 | 3 | } |
731 | 1.17k | } |
732 | | |
733 | 1.35k | auto res = ColumnType::create(); |
734 | | |
735 | 1.35k | bool is_invalid_json_path = false; |
736 | | |
737 | 1.35k | const auto& rdata = path_col->get_chars(); |
738 | 1.35k | const auto& roffsets = path_col->get_offsets(); |
739 | 1.35k | if (jsonb_data_const) { |
740 | 2 | if (data_null_map && (*data_null_map)[0]) { |
741 | 0 | return create_all_null_result(); |
742 | 0 | } |
743 | 2 | scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(), |
744 | 2 | result_null_map, is_invalid_json_path); |
745 | 1.35k | } else if (path_const) { |
746 | 1.32k | if (path_null_map && (*path_null_map)[0]) { |
747 | 0 | return create_all_null_result(); |
748 | 0 | } |
749 | 1.32k | vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(), |
750 | 1.32k | result_null_map, is_invalid_json_path); |
751 | 1.32k | } else { |
752 | 32 | vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), |
753 | 32 | result_null_map, is_invalid_json_path); |
754 | 32 | } |
755 | 1.35k | if (is_invalid_json_path) { |
756 | 7 | return Status::InvalidArgument( |
757 | 7 | "Json path error: Invalid Json Path for value: {}", |
758 | 7 | std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size())); |
759 | 7 | } |
760 | | |
761 | 1.34k | if (result_null_map) { |
762 | 1.17k | auto nullabel_col = |
763 | 1.17k | ColumnNullable::create(std::move(res), std::move(result_null_map_column)); |
764 | 1.17k | block.get_by_position(result).column = std::move(nullabel_col); |
765 | 1.17k | } else { |
766 | 178 | block.get_by_position(result).column = std::move(res); |
767 | 178 | } |
768 | 1.34k | return Status::OK(); |
769 | 1.35k | } |
770 | | |
771 | | private: |
772 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, |
773 | 3.01k | size_t l_str_size, JsonbPath& path) { |
774 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
775 | 3.01k | const JsonbDocument* doc = nullptr; |
776 | 3.01k | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
777 | 3.01k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
778 | 0 | return; |
779 | 0 | } |
780 | | |
781 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
782 | 3.01k | auto result = doc->getValue()->findValue(path); |
783 | | |
784 | 3.01k | if (result.value) { |
785 | 445 | res[i] = 1; |
786 | 445 | } |
787 | 3.01k | } |
788 | | static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
789 | | const ColumnString::Offsets& loffsets, |
790 | | const ColumnString::Chars& rdata, |
791 | | const ColumnString::Offsets& roffsets, Container& res, |
792 | 35 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
793 | 35 | const size_t size = loffsets.size(); |
794 | 35 | res.resize_fill(size, 0); |
795 | | |
796 | 80 | for (size_t i = 0; i < size; i++) { |
797 | 50 | if (result_null_map && (*result_null_map)[i]) { |
798 | 8 | continue; |
799 | 8 | } |
800 | | |
801 | 42 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
802 | 42 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
803 | | |
804 | 42 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
805 | 42 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
806 | | |
807 | 42 | JsonbPath path; |
808 | 42 | if (!path.seek(r_raw_str, r_str_size)) { |
809 | 5 | is_invalid_json_path = true; |
810 | 5 | return; |
811 | 5 | } |
812 | | |
813 | 37 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
814 | 37 | } |
815 | 35 | } |
816 | | static void scalar_vector(FunctionContext* context, const StringRef& ldata, |
817 | | const ColumnString::Chars& rdata, |
818 | | const ColumnString::Offsets& roffsets, Container& res, |
819 | 2 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
820 | 2 | const size_t size = roffsets.size(); |
821 | 2 | res.resize_fill(size, 0); |
822 | | |
823 | 14 | for (size_t i = 0; i < size; i++) { |
824 | 13 | if (result_null_map && (*result_null_map)[i]) { |
825 | 4 | continue; |
826 | 4 | } |
827 | 9 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
828 | 9 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
829 | | |
830 | 9 | JsonbPath path; |
831 | 9 | if (!path.seek(r_raw_str, r_str_size)) { |
832 | 1 | is_invalid_json_path = true; |
833 | 1 | return; |
834 | 1 | } |
835 | | |
836 | 8 | inner_loop_impl(i, res, ldata.data, ldata.size, path); |
837 | 8 | } |
838 | 2 | } |
839 | | static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
840 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
841 | | Container& res, const NullMap* result_null_map, |
842 | 1.32k | bool& is_invalid_json_path) { |
843 | 1.32k | const size_t size = loffsets.size(); |
844 | 1.32k | res.resize_fill(size, 0); |
845 | | |
846 | 1.32k | JsonbPath path; |
847 | 1.32k | if (!path.seek(rdata.data, rdata.size)) { |
848 | 1 | is_invalid_json_path = true; |
849 | 1 | return; |
850 | 1 | } |
851 | | |
852 | 4.51k | for (size_t i = 0; i < size; i++) { |
853 | 3.19k | if (result_null_map && (*result_null_map)[i]) { |
854 | 232 | continue; |
855 | 232 | } |
856 | 2.96k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
857 | 2.96k | int l_str_size = loffsets[i] - loffsets[i - 1]; |
858 | | |
859 | 2.96k | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
860 | 2.96k | } |
861 | 1.32k | } |
862 | | }; |
863 | | |
864 | | template <typename ValueType> |
865 | | struct JsonbExtractStringImpl { |
866 | | using ReturnType = typename ValueType::ReturnType; |
867 | | using ColumnType = typename ValueType::ColumnType; |
868 | | |
869 | | private: |
870 | | static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i, |
871 | | ColumnString::Chars& res_data, |
872 | | ColumnString::Offsets& res_offsets, NullMap& null_map, |
873 | | std::unique_ptr<JsonbToJson>& formater, |
874 | 140k | const char* l_raw, size_t l_size, JsonbPath& path) { |
875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
876 | 140k | const JsonbDocument* doc = nullptr; |
877 | 140k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
878 | 140k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
880 | 0 | return; |
881 | 0 | } |
882 | | |
883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
884 | 140k | auto find_result = doc->getValue()->findValue(path); |
885 | | |
886 | 140k | if (UNLIKELY(!find_result.value)) { |
887 | 20.9k | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
888 | 20.9k | return; |
889 | 20.9k | } |
890 | | |
891 | 120k | if constexpr (ValueType::only_get_type) { |
892 | 429 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, |
893 | 429 | res_data, res_offsets); |
894 | 429 | return; |
895 | 119k | } else { |
896 | 119k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); |
897 | 119k | if constexpr (ValueType::no_quotes) { |
898 | 2 | if (find_result.value->isString()) { |
899 | 1 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); |
900 | 1 | const auto* blob = str_value->getBlob(); |
901 | 1 | if (str_value->length() > 1 && blob[0] == '"' && |
902 | 1 | blob[str_value->length() - 1] == '"') { |
903 | 0 | writer->writeStartString(); |
904 | 0 | writer->writeString(blob + 1, str_value->length() - 2); |
905 | 0 | writer->writeEndString(); |
906 | 0 | StringOP::push_value_string( |
907 | 0 | std::string_view(writer->getOutput()->getBuffer(), |
908 | 0 | writer->getOutput()->getSize()), |
909 | 0 | i, res_data, res_offsets); |
910 | 0 | return; |
911 | 0 | } |
912 | 1 | } |
913 | 2 | } |
914 | 2 | writer->writeValueSimple(find_result.value); |
915 | 119k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
916 | 119k | writer->getOutput()->getSize()), |
917 | 119k | i, res_data, res_offsets); |
918 | 119k | } |
919 | 120k | } _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 874 | 2.98k | const char* l_raw, size_t l_size, JsonbPath& path) { | 875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 876 | 2.98k | const JsonbDocument* doc = nullptr; | 877 | 2.98k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 878 | 2.98k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 880 | 0 | return; | 881 | 0 | } | 882 | | | 883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 884 | 2.98k | auto find_result = doc->getValue()->findValue(path); | 885 | | | 886 | 2.98k | if (UNLIKELY(!find_result.value)) { | 887 | 2.55k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 888 | 2.55k | return; | 889 | 2.55k | } | 890 | | | 891 | 429 | if constexpr (ValueType::only_get_type) { | 892 | 429 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 893 | 429 | res_data, res_offsets); | 894 | 429 | return; | 895 | | } else { | 896 | | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 897 | | if constexpr (ValueType::no_quotes) { | 898 | | if (find_result.value->isString()) { | 899 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 900 | | const auto* blob = str_value->getBlob(); | 901 | | if (str_value->length() > 1 && blob[0] == '"' && | 902 | | blob[str_value->length() - 1] == '"') { | 903 | | writer->writeStartString(); | 904 | | writer->writeString(blob + 1, str_value->length() - 2); | 905 | | writer->writeEndString(); | 906 | | StringOP::push_value_string( | 907 | | std::string_view(writer->getOutput()->getBuffer(), | 908 | | writer->getOutput()->getSize()), | 909 | | i, res_data, res_offsets); | 910 | | return; | 911 | | } | 912 | | } | 913 | | } | 914 | | writer->writeValueSimple(find_result.value); | 915 | | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 916 | | writer->getOutput()->getSize()), | 917 | | i, res_data, res_offsets); | 918 | | } | 919 | 429 | } |
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 874 | 137k | const char* l_raw, size_t l_size, JsonbPath& path) { | 875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 876 | 137k | const JsonbDocument* doc = nullptr; | 877 | 137k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 878 | 137k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 880 | 0 | return; | 881 | 0 | } | 882 | | | 883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 884 | 137k | auto find_result = doc->getValue()->findValue(path); | 885 | | | 886 | 137k | if (UNLIKELY(!find_result.value)) { | 887 | 18.3k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 888 | 18.3k | return; | 889 | 18.3k | } | 890 | | | 891 | | if constexpr (ValueType::only_get_type) { | 892 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 893 | | res_data, res_offsets); | 894 | | return; | 895 | 119k | } else { | 896 | 119k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 897 | | if constexpr (ValueType::no_quotes) { | 898 | | if (find_result.value->isString()) { | 899 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 900 | | const auto* blob = str_value->getBlob(); | 901 | | if (str_value->length() > 1 && blob[0] == '"' && | 902 | | blob[str_value->length() - 1] == '"') { | 903 | | writer->writeStartString(); | 904 | | writer->writeString(blob + 1, str_value->length() - 2); | 905 | | writer->writeEndString(); | 906 | | StringOP::push_value_string( | 907 | | std::string_view(writer->getOutput()->getBuffer(), | 908 | | writer->getOutput()->getSize()), | 909 | | i, res_data, res_offsets); | 910 | | return; | 911 | | } | 912 | | } | 913 | | } | 914 | 119k | writer->writeValueSimple(find_result.value); | 915 | 119k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 916 | 119k | writer->getOutput()->getSize()), | 917 | 119k | i, res_data, res_offsets); | 918 | 119k | } | 919 | 119k | } |
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 874 | 2 | const char* l_raw, size_t l_size, JsonbPath& path) { | 875 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 876 | 2 | const JsonbDocument* doc = nullptr; | 877 | 2 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 878 | 2 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 879 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 880 | 0 | return; | 881 | 0 | } | 882 | | | 883 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 884 | 2 | auto find_result = doc->getValue()->findValue(path); | 885 | | | 886 | 2 | if (UNLIKELY(!find_result.value)) { | 887 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 888 | 0 | return; | 889 | 0 | } | 890 | | | 891 | | if constexpr (ValueType::only_get_type) { | 892 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 893 | | res_data, res_offsets); | 894 | | return; | 895 | 2 | } else { | 896 | 2 | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 897 | 2 | if constexpr (ValueType::no_quotes) { | 898 | 2 | if (find_result.value->isString()) { | 899 | 1 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 900 | 1 | const auto* blob = str_value->getBlob(); | 901 | 1 | if (str_value->length() > 1 && blob[0] == '"' && | 902 | 1 | blob[str_value->length() - 1] == '"') { | 903 | 0 | writer->writeStartString(); | 904 | 0 | writer->writeString(blob + 1, str_value->length() - 2); | 905 | 0 | writer->writeEndString(); | 906 | 0 | StringOP::push_value_string( | 907 | 0 | std::string_view(writer->getOutput()->getBuffer(), | 908 | 0 | writer->getOutput()->getSize()), | 909 | 0 | i, res_data, res_offsets); | 910 | 0 | return; | 911 | 0 | } | 912 | 1 | } | 913 | 2 | } | 914 | 2 | writer->writeValueSimple(find_result.value); | 915 | 2 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 916 | 2 | writer->getOutput()->getSize()), | 917 | 2 | i, res_data, res_offsets); | 918 | 2 | } | 919 | 2 | } |
|
920 | | |
921 | | public: |
922 | | // for jsonb_extract_string |
923 | | static Status vector_vector_v2( |
924 | | FunctionContext* context, const ColumnString::Chars& ldata, |
925 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
926 | | const bool& json_data_const, |
927 | | const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths |
928 | | const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const, |
929 | 11.3k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { |
930 | 11.3k | const size_t input_rows_count = null_map.size(); |
931 | 11.3k | res_offsets.resize(input_rows_count); |
932 | | |
933 | 11.3k | auto writer = std::make_unique<JsonbWriter>(); |
934 | 11.3k | std::unique_ptr<JsonbToJson> formater; |
935 | | |
936 | | // reuseable json path list, espacially for const path |
937 | 11.3k | std::vector<JsonbPath> json_path_list; |
938 | 11.3k | json_path_list.resize(rdata_columns.size()); |
939 | | |
940 | | // lambda function to parse json path for row i and path pi |
941 | 11.6k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { |
942 | 11.6k | const auto index = index_check_const(i, path_const[pi]); |
943 | | |
944 | 11.6k | const ColumnString* path_col = rdata_columns[pi]; |
945 | 11.6k | const ColumnString::Chars& rdata = path_col->get_chars(); |
946 | 11.6k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); |
947 | 11.6k | size_t r_off = roffsets[index - 1]; |
948 | 11.6k | size_t r_size = roffsets[index] - r_off; |
949 | 11.6k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); |
950 | | |
951 | 11.6k | JsonbPath path; |
952 | 11.6k | if (!path.seek(r_raw, r_size)) { |
953 | 7 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
954 | 7 | std::string_view(r_raw, r_size)); |
955 | 7 | } |
956 | | |
957 | 11.6k | json_path_list[pi] = std::move(path); |
958 | | |
959 | 11.6k | return Status::OK(); |
960 | 11.6k | }; _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 941 | 1.34k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 1.34k | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 1.34k | const ColumnString* path_col = rdata_columns[pi]; | 945 | 1.34k | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 1.34k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 1.34k | size_t r_off = roffsets[index - 1]; | 948 | 1.34k | size_t r_size = roffsets[index] - r_off; | 949 | 1.34k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 1.34k | JsonbPath path; | 952 | 1.34k | if (!path.seek(r_raw, r_size)) { | 953 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 1 | std::string_view(r_raw, r_size)); | 955 | 1 | } | 956 | | | 957 | 1.33k | json_path_list[pi] = std::move(path); | 958 | | | 959 | 1.33k | return Status::OK(); | 960 | 1.34k | }; |
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 941 | 10.3k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 10.3k | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 10.3k | const ColumnString* path_col = rdata_columns[pi]; | 945 | 10.3k | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 10.3k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 10.3k | size_t r_off = roffsets[index - 1]; | 948 | 10.3k | size_t r_size = roffsets[index] - r_off; | 949 | 10.3k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 10.3k | JsonbPath path; | 952 | 10.3k | if (!path.seek(r_raw, r_size)) { | 953 | 6 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 6 | std::string_view(r_raw, r_size)); | 955 | 6 | } | 956 | | | 957 | 10.3k | json_path_list[pi] = std::move(path); | 958 | | | 959 | 10.3k | return Status::OK(); | 960 | 10.3k | }; |
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 941 | 8 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 8 | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 8 | const ColumnString* path_col = rdata_columns[pi]; | 945 | 8 | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 8 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 8 | size_t r_off = roffsets[index - 1]; | 948 | 8 | size_t r_size = roffsets[index] - r_off; | 949 | 8 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 8 | JsonbPath path; | 952 | 8 | if (!path.seek(r_raw, r_size)) { | 953 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 0 | std::string_view(r_raw, r_size)); | 955 | 0 | } | 956 | | | 957 | 8 | json_path_list[pi] = std::move(path); | 958 | | | 959 | 8 | return Status::OK(); | 960 | 8 | }; |
|
961 | | |
962 | 22.9k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { |
963 | 11.6k | if (path_const[pi]) { |
964 | 11.4k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { |
965 | 41 | continue; |
966 | 41 | } |
967 | 11.3k | RETURN_IF_ERROR(parse_json_path(0, pi)); |
968 | 11.3k | } |
969 | 11.6k | } |
970 | | |
971 | 11.3k | res_data.reserve(ldata.size()); |
972 | 154k | for (size_t i = 0; i < input_rows_count; ++i) { |
973 | 143k | if (null_map[i]) { |
974 | 0 | continue; |
975 | 0 | } |
976 | | |
977 | 143k | const auto data_index = index_check_const(i, json_data_const); |
978 | 143k | if (l_null_map && (*l_null_map)[data_index]) { |
979 | 1.90k | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
980 | 1.90k | continue; |
981 | 1.90k | } |
982 | | |
983 | 141k | size_t l_off = loffsets[data_index - 1]; |
984 | 141k | size_t l_size = loffsets[data_index] - l_off; |
985 | 141k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); |
986 | 141k | if (rdata_columns.size() == 1) { // just return origin value |
987 | 140k | const auto path_index = index_check_const(i, path_const[0]); |
988 | 140k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { |
989 | 30 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
990 | 30 | continue; |
991 | 30 | } |
992 | | |
993 | 140k | if (!path_const[0]) { |
994 | 288 | RETURN_IF_ERROR(parse_json_path(i, 0)); |
995 | 288 | } |
996 | | |
997 | 140k | writer->reset(); |
998 | 140k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, |
999 | 140k | l_size, json_path_list[0]); |
1000 | 140k | } else { // will make array string to user |
1001 | 756 | writer->reset(); |
1002 | 756 | bool has_value = false; |
1003 | | |
1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1005 | 756 | const JsonbDocument* doc = nullptr; |
1006 | 756 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
1007 | | |
1008 | 1.84k | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { |
1009 | 1.19k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1010 | 0 | continue; |
1011 | 0 | } |
1012 | | |
1013 | 1.19k | const auto path_index = index_check_const(i, path_const[pi]); |
1014 | 1.19k | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { |
1015 | 99 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1016 | 99 | break; |
1017 | 99 | } |
1018 | | |
1019 | 1.09k | if (!path_const[pi]) { |
1020 | 28 | RETURN_IF_ERROR(parse_json_path(i, pi)); |
1021 | 28 | } |
1022 | | |
1023 | 1.09k | auto find_result = doc->getValue()->findValue(json_path_list[pi]); |
1024 | | |
1025 | 1.09k | if (find_result.value) { |
1026 | 255 | if (!has_value) { |
1027 | 141 | has_value = true; |
1028 | 141 | writer->writeStartArray(); |
1029 | 141 | } |
1030 | 255 | if (find_result.value->isArray() && find_result.is_wildcard) { |
1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], |
1032 | | // if value is array, we should write all items in array, instead of write the array itself. |
1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] |
1034 | 45 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { |
1035 | 45 | writer->writeValue(&item); |
1036 | 45 | } |
1037 | 235 | } else { |
1038 | 235 | writer->writeValue(find_result.value); |
1039 | 235 | } |
1040 | 255 | } |
1041 | 1.09k | } |
1042 | 756 | if (has_value) { |
1043 | 141 | writer->writeEndArray(); |
1044 | 141 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
1045 | 141 | writer->getOutput()->getSize()), |
1046 | 141 | i, res_data, res_offsets); |
1047 | 615 | } else { |
1048 | 615 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1049 | 615 | } |
1050 | 756 | } |
1051 | 141k | } //for |
1052 | 11.2k | return Status::OK(); |
1053 | 11.3k | } _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 929 | 1.32k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 930 | 1.32k | const size_t input_rows_count = null_map.size(); | 931 | 1.32k | res_offsets.resize(input_rows_count); | 932 | | | 933 | 1.32k | auto writer = std::make_unique<JsonbWriter>(); | 934 | 1.32k | std::unique_ptr<JsonbToJson> formater; | 935 | | | 936 | | // reuseable json path list, espacially for const path | 937 | 1.32k | std::vector<JsonbPath> json_path_list; | 938 | 1.32k | json_path_list.resize(rdata_columns.size()); | 939 | | | 940 | | // lambda function to parse json path for row i and path pi | 941 | 1.32k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 1.32k | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 1.32k | const ColumnString* path_col = rdata_columns[pi]; | 945 | 1.32k | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 1.32k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 1.32k | size_t r_off = roffsets[index - 1]; | 948 | 1.32k | size_t r_size = roffsets[index] - r_off; | 949 | 1.32k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 1.32k | JsonbPath path; | 952 | 1.32k | if (!path.seek(r_raw, r_size)) { | 953 | 1.32k | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 1.32k | std::string_view(r_raw, r_size)); | 955 | 1.32k | } | 956 | | | 957 | 1.32k | json_path_list[pi] = std::move(path); | 958 | | | 959 | 1.32k | return Status::OK(); | 960 | 1.32k | }; | 961 | | | 962 | 2.65k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 963 | 1.32k | if (path_const[pi]) { | 964 | 1.32k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 965 | 1 | continue; | 966 | 1 | } | 967 | 1.32k | RETURN_IF_ERROR(parse_json_path(0, pi)); | 968 | 1.32k | } | 969 | 1.32k | } | 970 | | | 971 | 1.32k | res_data.reserve(ldata.size()); | 972 | 4.57k | for (size_t i = 0; i < input_rows_count; ++i) { | 973 | 3.24k | if (null_map[i]) { | 974 | 0 | continue; | 975 | 0 | } | 976 | | | 977 | 3.24k | const auto data_index = index_check_const(i, json_data_const); | 978 | 3.24k | if (l_null_map && (*l_null_map)[data_index]) { | 979 | 248 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 980 | 248 | continue; | 981 | 248 | } | 982 | | | 983 | 3.00k | size_t l_off = loffsets[data_index - 1]; | 984 | 3.00k | size_t l_size = loffsets[data_index] - l_off; | 985 | 3.00k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 986 | 3.00k | if (rdata_columns.size() == 1) { // just return origin value | 987 | 3.00k | const auto path_index = index_check_const(i, path_const[0]); | 988 | 3.00k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 989 | 16 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 990 | 16 | continue; | 991 | 16 | } | 992 | | | 993 | 2.98k | if (!path_const[0]) { | 994 | 18 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 995 | 18 | } | 996 | | | 997 | 2.98k | writer->reset(); | 998 | 2.98k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 999 | 2.98k | l_size, json_path_list[0]); | 1000 | 2.98k | } else { // will make array string to user | 1001 | 0 | writer->reset(); | 1002 | 0 | bool has_value = false; | 1003 | | | 1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 1005 | 0 | const JsonbDocument* doc = nullptr; | 1006 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1007 | |
| 1008 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1009 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1010 | 0 | continue; | 1011 | 0 | } | 1012 | | | 1013 | 0 | const auto path_index = index_check_const(i, path_const[pi]); | 1014 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1015 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1016 | 0 | break; | 1017 | 0 | } | 1018 | | | 1019 | 0 | if (!path_const[pi]) { | 1020 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1021 | 0 | } | 1022 | | | 1023 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1024 | |
| 1025 | 0 | if (find_result.value) { | 1026 | 0 | if (!has_value) { | 1027 | 0 | has_value = true; | 1028 | 0 | writer->writeStartArray(); | 1029 | 0 | } | 1030 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1032 | | // if value is array, we should write all items in array, instead of write the array itself. | 1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1034 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1035 | 0 | writer->writeValue(&item); | 1036 | 0 | } | 1037 | 0 | } else { | 1038 | 0 | writer->writeValue(find_result.value); | 1039 | 0 | } | 1040 | 0 | } | 1041 | 0 | } | 1042 | 0 | if (has_value) { | 1043 | 0 | writer->writeEndArray(); | 1044 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1045 | 0 | writer->getOutput()->getSize()), | 1046 | 0 | i, res_data, res_offsets); | 1047 | 0 | } else { | 1048 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1049 | 0 | } | 1050 | 0 | } | 1051 | 3.00k | } //for | 1052 | 1.32k | return Status::OK(); | 1053 | 1.32k | } |
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 929 | 9.96k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 930 | 9.96k | const size_t input_rows_count = null_map.size(); | 931 | 9.96k | res_offsets.resize(input_rows_count); | 932 | | | 933 | 9.96k | auto writer = std::make_unique<JsonbWriter>(); | 934 | 9.96k | std::unique_ptr<JsonbToJson> formater; | 935 | | | 936 | | // reuseable json path list, espacially for const path | 937 | 9.96k | std::vector<JsonbPath> json_path_list; | 938 | 9.96k | json_path_list.resize(rdata_columns.size()); | 939 | | | 940 | | // lambda function to parse json path for row i and path pi | 941 | 9.96k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 9.96k | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 9.96k | const ColumnString* path_col = rdata_columns[pi]; | 945 | 9.96k | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 9.96k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 9.96k | size_t r_off = roffsets[index - 1]; | 948 | 9.96k | size_t r_size = roffsets[index] - r_off; | 949 | 9.96k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 9.96k | JsonbPath path; | 952 | 9.96k | if (!path.seek(r_raw, r_size)) { | 953 | 9.96k | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 9.96k | std::string_view(r_raw, r_size)); | 955 | 9.96k | } | 956 | | | 957 | 9.96k | json_path_list[pi] = std::move(path); | 958 | | | 959 | 9.96k | return Status::OK(); | 960 | 9.96k | }; | 961 | | | 962 | 20.2k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 963 | 10.2k | if (path_const[pi]) { | 964 | 10.0k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 965 | 40 | continue; | 966 | 40 | } | 967 | 10.0k | RETURN_IF_ERROR(parse_json_path(0, pi)); | 968 | 10.0k | } | 969 | 10.2k | } | 970 | | | 971 | 9.96k | res_data.reserve(ldata.size()); | 972 | 149k | for (size_t i = 0; i < input_rows_count; ++i) { | 973 | 139k | if (null_map[i]) { | 974 | 0 | continue; | 975 | 0 | } | 976 | | | 977 | 139k | const auto data_index = index_check_const(i, json_data_const); | 978 | 139k | if (l_null_map && (*l_null_map)[data_index]) { | 979 | 1.65k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 980 | 1.65k | continue; | 981 | 1.65k | } | 982 | | | 983 | 138k | size_t l_off = loffsets[data_index - 1]; | 984 | 138k | size_t l_size = loffsets[data_index] - l_off; | 985 | 138k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 986 | 138k | if (rdata_columns.size() == 1) { // just return origin value | 987 | 137k | const auto path_index = index_check_const(i, path_const[0]); | 988 | 137k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 989 | 14 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 990 | 14 | continue; | 991 | 14 | } | 992 | | | 993 | 137k | if (!path_const[0]) { | 994 | 268 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 995 | 268 | } | 996 | | | 997 | 137k | writer->reset(); | 998 | 137k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 999 | 137k | l_size, json_path_list[0]); | 1000 | 137k | } else { // will make array string to user | 1001 | 753 | writer->reset(); | 1002 | 753 | bool has_value = false; | 1003 | | | 1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 1005 | 753 | const JsonbDocument* doc = nullptr; | 1006 | 753 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1007 | | | 1008 | 1.83k | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1009 | 1.18k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1010 | 0 | continue; | 1011 | 0 | } | 1012 | | | 1013 | 1.18k | const auto path_index = index_check_const(i, path_const[pi]); | 1014 | 1.18k | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1015 | 98 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1016 | 98 | break; | 1017 | 98 | } | 1018 | | | 1019 | 1.08k | if (!path_const[pi]) { | 1020 | 22 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1021 | 22 | } | 1022 | | | 1023 | 1.08k | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1024 | | | 1025 | 1.08k | if (find_result.value) { | 1026 | 249 | if (!has_value) { | 1027 | 138 | has_value = true; | 1028 | 138 | writer->writeStartArray(); | 1029 | 138 | } | 1030 | 249 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1032 | | // if value is array, we should write all items in array, instead of write the array itself. | 1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1034 | 45 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1035 | 45 | writer->writeValue(&item); | 1036 | 45 | } | 1037 | 229 | } else { | 1038 | 229 | writer->writeValue(find_result.value); | 1039 | 229 | } | 1040 | 249 | } | 1041 | 1.08k | } | 1042 | 753 | if (has_value) { | 1043 | 138 | writer->writeEndArray(); | 1044 | 138 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1045 | 138 | writer->getOutput()->getSize()), | 1046 | 138 | i, res_data, res_offsets); | 1047 | 615 | } else { | 1048 | 615 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1049 | 615 | } | 1050 | 753 | } | 1051 | 138k | } //for | 1052 | 9.96k | return Status::OK(); | 1053 | 9.96k | } |
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 929 | 6 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 930 | 6 | const size_t input_rows_count = null_map.size(); | 931 | 6 | res_offsets.resize(input_rows_count); | 932 | | | 933 | 6 | auto writer = std::make_unique<JsonbWriter>(); | 934 | 6 | std::unique_ptr<JsonbToJson> formater; | 935 | | | 936 | | // reuseable json path list, espacially for const path | 937 | 6 | std::vector<JsonbPath> json_path_list; | 938 | 6 | json_path_list.resize(rdata_columns.size()); | 939 | | | 940 | | // lambda function to parse json path for row i and path pi | 941 | 6 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 942 | 6 | const auto index = index_check_const(i, path_const[pi]); | 943 | | | 944 | 6 | const ColumnString* path_col = rdata_columns[pi]; | 945 | 6 | const ColumnString::Chars& rdata = path_col->get_chars(); | 946 | 6 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 947 | 6 | size_t r_off = roffsets[index - 1]; | 948 | 6 | size_t r_size = roffsets[index] - r_off; | 949 | 6 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 950 | | | 951 | 6 | JsonbPath path; | 952 | 6 | if (!path.seek(r_raw, r_size)) { | 953 | 6 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 954 | 6 | std::string_view(r_raw, r_size)); | 955 | 6 | } | 956 | | | 957 | 6 | json_path_list[pi] = std::move(path); | 958 | | | 959 | 6 | return Status::OK(); | 960 | 6 | }; | 961 | | | 962 | 16 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 963 | 10 | if (path_const[pi]) { | 964 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 965 | 0 | continue; | 966 | 0 | } | 967 | 0 | RETURN_IF_ERROR(parse_json_path(0, pi)); | 968 | 0 | } | 969 | 10 | } | 970 | | | 971 | 6 | res_data.reserve(ldata.size()); | 972 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { | 973 | 6 | if (null_map[i]) { | 974 | 0 | continue; | 975 | 0 | } | 976 | | | 977 | 6 | const auto data_index = index_check_const(i, json_data_const); | 978 | 6 | if (l_null_map && (*l_null_map)[data_index]) { | 979 | 1 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 980 | 1 | continue; | 981 | 1 | } | 982 | | | 983 | 5 | size_t l_off = loffsets[data_index - 1]; | 984 | 5 | size_t l_size = loffsets[data_index] - l_off; | 985 | 5 | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 986 | 5 | if (rdata_columns.size() == 1) { // just return origin value | 987 | 2 | const auto path_index = index_check_const(i, path_const[0]); | 988 | 2 | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 989 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 990 | 0 | continue; | 991 | 0 | } | 992 | | | 993 | 2 | if (!path_const[0]) { | 994 | 2 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 995 | 2 | } | 996 | | | 997 | 2 | writer->reset(); | 998 | 2 | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 999 | 2 | l_size, json_path_list[0]); | 1000 | 3 | } else { // will make array string to user | 1001 | 3 | writer->reset(); | 1002 | 3 | bool has_value = false; | 1003 | | | 1004 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 1005 | 3 | const JsonbDocument* doc = nullptr; | 1006 | 3 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1007 | | | 1008 | 9 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1009 | 7 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1010 | 0 | continue; | 1011 | 0 | } | 1012 | | | 1013 | 7 | const auto path_index = index_check_const(i, path_const[pi]); | 1014 | 7 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1015 | 1 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1016 | 1 | break; | 1017 | 1 | } | 1018 | | | 1019 | 6 | if (!path_const[pi]) { | 1020 | 6 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1021 | 6 | } | 1022 | | | 1023 | 6 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1024 | | | 1025 | 6 | if (find_result.value) { | 1026 | 6 | if (!has_value) { | 1027 | 3 | has_value = true; | 1028 | 3 | writer->writeStartArray(); | 1029 | 3 | } | 1030 | 6 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1031 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1032 | | // if value is array, we should write all items in array, instead of write the array itself. | 1033 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1034 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1035 | 0 | writer->writeValue(&item); | 1036 | 0 | } | 1037 | 6 | } else { | 1038 | 6 | writer->writeValue(find_result.value); | 1039 | 6 | } | 1040 | 6 | } | 1041 | 6 | } | 1042 | 3 | if (has_value) { | 1043 | 3 | writer->writeEndArray(); | 1044 | 3 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1045 | 3 | writer->getOutput()->getSize()), | 1046 | 3 | i, res_data, res_offsets); | 1047 | 3 | } else { | 1048 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1049 | 0 | } | 1050 | 3 | } | 1051 | 5 | } //for | 1052 | 6 | return Status::OK(); | 1053 | 6 | } |
|
1054 | | |
1055 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1056 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1057 | | const ColumnString::Chars& rdata, |
1058 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1059 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1060 | | NullMap& null_map) { |
1061 | | size_t input_rows_count = loffsets.size(); |
1062 | | res_offsets.resize(input_rows_count); |
1063 | | |
1064 | | std::unique_ptr<JsonbToJson> formater; |
1065 | | |
1066 | | JsonbWriter writer; |
1067 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1068 | | if (l_null_map && (*l_null_map)[i]) { |
1069 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1070 | | continue; |
1071 | | } |
1072 | | |
1073 | | if (r_null_map && (*r_null_map)[i]) { |
1074 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1075 | | continue; |
1076 | | } |
1077 | | |
1078 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1079 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1080 | | |
1081 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1082 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1083 | | |
1084 | | JsonbPath path; |
1085 | | if (!path.seek(r_raw, r_size)) { |
1086 | | return Status::InvalidArgument( |
1087 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1088 | | std::string_view(r_raw, r_size), i); |
1089 | | } |
1090 | | |
1091 | | writer.reset(); |
1092 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1093 | | path); |
1094 | | } //for |
1095 | | return Status::OK(); |
1096 | | } //function |
1097 | | |
1098 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1099 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1100 | | const StringRef& rdata, ColumnString::Chars& res_data, |
1101 | | ColumnString::Offsets& res_offsets, NullMap& null_map) { |
1102 | | size_t input_rows_count = loffsets.size(); |
1103 | | res_offsets.resize(input_rows_count); |
1104 | | |
1105 | | std::unique_ptr<JsonbToJson> formater; |
1106 | | |
1107 | | JsonbPath path; |
1108 | | if (!path.seek(rdata.data, rdata.size)) { |
1109 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1110 | | std::string_view(rdata.data, rdata.size)); |
1111 | | } |
1112 | | |
1113 | | JsonbWriter writer; |
1114 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1115 | | if (l_null_map && (*l_null_map)[i]) { |
1116 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1117 | | continue; |
1118 | | } |
1119 | | |
1120 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1121 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1122 | | |
1123 | | writer.reset(); |
1124 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1125 | | path); |
1126 | | } //for |
1127 | | return Status::OK(); |
1128 | | } //function |
1129 | | |
1130 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1131 | | const ColumnString::Chars& rdata, |
1132 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1133 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1134 | | NullMap& null_map) { |
1135 | | size_t input_rows_count = roffsets.size(); |
1136 | | res_offsets.resize(input_rows_count); |
1137 | | |
1138 | | std::unique_ptr<JsonbToJson> formater; |
1139 | | |
1140 | | JsonbWriter writer; |
1141 | | |
1142 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1143 | | if (r_null_map && (*r_null_map)[i]) { |
1144 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1145 | | continue; |
1146 | | } |
1147 | | |
1148 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1149 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1150 | | |
1151 | | JsonbPath path; |
1152 | | if (!path.seek(r_raw, r_size)) { |
1153 | | return Status::InvalidArgument( |
1154 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1155 | | std::string_view(r_raw, r_size), i); |
1156 | | } |
1157 | | |
1158 | | writer.reset(); |
1159 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data, |
1160 | | ldata.size, path); |
1161 | | } //for |
1162 | | return Status::OK(); |
1163 | | } //function |
1164 | | }; |
1165 | | |
1166 | | struct JsonbExtractIsnull { |
1167 | | static constexpr auto name = "json_extract_isnull"; |
1168 | | static constexpr auto alias = "jsonb_extract_isnull"; |
1169 | | |
1170 | | using ReturnType = DataTypeUInt8; |
1171 | | using ColumnType = ColumnUInt8; |
1172 | | using Container = typename ColumnType::Container; |
1173 | | |
1174 | | private: |
1175 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map, |
1176 | | const char* l_raw_str, size_t l_str_size, |
1177 | 2.97k | JsonbPath& path) { |
1178 | 2.97k | if (null_map[i]) { |
1179 | 0 | res[i] = 0; |
1180 | 0 | return; |
1181 | 0 | } |
1182 | | |
1183 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1184 | 2.97k | const JsonbDocument* doc = nullptr; |
1185 | 2.97k | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
1186 | 2.97k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1187 | 0 | null_map[i] = 1; |
1188 | 0 | res[i] = 0; |
1189 | 0 | return; |
1190 | 0 | } |
1191 | | |
1192 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
1193 | 2.97k | auto find_result = doc->getValue()->findValue(path); |
1194 | 2.97k | const auto* value = find_result.value; |
1195 | | |
1196 | 2.97k | if (UNLIKELY(!value)) { |
1197 | 2.55k | null_map[i] = 1; |
1198 | 2.55k | res[i] = 0; |
1199 | 2.55k | return; |
1200 | 2.55k | } |
1201 | | |
1202 | 417 | res[i] = value->isNull(); |
1203 | 417 | } |
1204 | | |
1205 | | public: |
1206 | | // for jsonb_extract_int/int64/double |
1207 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1208 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1209 | | const ColumnString::Chars& rdata, |
1210 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1211 | 1 | Container& res, NullMap& null_map) { |
1212 | 1 | size_t size = loffsets.size(); |
1213 | 1 | res.resize(size); |
1214 | | |
1215 | 13 | for (size_t i = 0; i < loffsets.size(); i++) { |
1216 | 12 | if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) { |
1217 | 8 | res[i] = 0; |
1218 | 8 | null_map[i] = 1; |
1219 | 8 | continue; |
1220 | 8 | } |
1221 | | |
1222 | 4 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1223 | 4 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1224 | | |
1225 | 4 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1226 | 4 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1227 | | |
1228 | 4 | JsonbPath path; |
1229 | 4 | if (!path.seek(r_raw_str, r_str_size)) { |
1230 | 0 | return Status::InvalidArgument( |
1231 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1232 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1233 | 0 | } |
1234 | | |
1235 | 4 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1236 | 4 | } //for |
1237 | 1 | return Status::OK(); |
1238 | 1 | } //function |
1239 | | |
1240 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1241 | | const ColumnString::Chars& rdata, |
1242 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1243 | 1 | Container& res, NullMap& null_map) { |
1244 | 1 | size_t size = roffsets.size(); |
1245 | 1 | res.resize(size); |
1246 | | |
1247 | 13 | for (size_t i = 0; i < size; i++) { |
1248 | 12 | if (r_null_map && (*r_null_map)[i]) { |
1249 | 4 | res[i] = 0; |
1250 | 4 | null_map[i] = 1; |
1251 | 4 | continue; |
1252 | 4 | } |
1253 | | |
1254 | 8 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1255 | 8 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1256 | | |
1257 | 8 | JsonbPath path; |
1258 | 8 | if (!path.seek(r_raw_str, r_str_size)) { |
1259 | 0 | return Status::InvalidArgument( |
1260 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1261 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1262 | 0 | } |
1263 | | |
1264 | 8 | inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path); |
1265 | 8 | } //for |
1266 | 1 | return Status::OK(); |
1267 | 1 | } //function |
1268 | | |
1269 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1270 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1271 | 1.32k | const StringRef& rdata, Container& res, NullMap& null_map) { |
1272 | 1.32k | size_t size = loffsets.size(); |
1273 | 1.32k | res.resize(size); |
1274 | | |
1275 | 1.32k | JsonbPath path; |
1276 | 1.32k | if (!path.seek(rdata.data, rdata.size)) { |
1277 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1278 | 0 | std::string_view(rdata.data, rdata.size)); |
1279 | 0 | } |
1280 | | |
1281 | 4.50k | for (size_t i = 0; i < loffsets.size(); i++) { |
1282 | 3.18k | if (l_null_map && (*l_null_map)[i]) { |
1283 | 228 | res[i] = 0; |
1284 | 228 | null_map[i] = 1; |
1285 | 228 | continue; |
1286 | 228 | } |
1287 | | |
1288 | 2.95k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1289 | 2.95k | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1290 | | |
1291 | 2.95k | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1292 | 2.95k | } //for |
1293 | 1.32k | return Status::OK(); |
1294 | 1.32k | } //function |
1295 | | }; |
1296 | | |
1297 | | struct JsonbTypeJson { |
1298 | | using T = std::string; |
1299 | | using ReturnType = DataTypeJsonb; |
1300 | | using ColumnType = ColumnString; |
1301 | | static const bool only_get_type = false; |
1302 | | static const bool no_quotes = false; |
1303 | | }; |
1304 | | |
1305 | | struct JsonbTypeJsonNoQuotes { |
1306 | | using T = std::string; |
1307 | | using ReturnType = DataTypeJsonb; |
1308 | | using ColumnType = ColumnString; |
1309 | | static const bool only_get_type = false; |
1310 | | static const bool no_quotes = true; |
1311 | | }; |
1312 | | |
1313 | | struct JsonbTypeType { |
1314 | | using T = std::string; |
1315 | | using ReturnType = DataTypeString; |
1316 | | using ColumnType = ColumnString; |
1317 | | static const bool only_get_type = true; |
1318 | | static const bool no_quotes = false; |
1319 | | }; |
1320 | | |
1321 | | struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> { |
1322 | | static constexpr auto name = "jsonb_extract"; |
1323 | | static constexpr auto alias = "json_extract"; |
1324 | | }; |
1325 | | |
1326 | | struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> { |
1327 | | static constexpr auto name = "jsonb_extract_no_quotes"; |
1328 | | static constexpr auto alias = "json_extract_no_quotes"; |
1329 | | }; |
1330 | | |
1331 | | struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> { |
1332 | | static constexpr auto name = "json_type"; |
1333 | | static constexpr auto alias = "jsonb_type"; |
1334 | | }; |
1335 | | |
1336 | | using FunctionJsonbExists = FunctionJsonbExtractPath; |
1337 | | using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>; |
1338 | | |
1339 | | using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>; |
1340 | | using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>; |
1341 | | using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>; |
1342 | | |
1343 | | template <typename Impl> |
1344 | | class FunctionJsonbLength : public IFunction { |
1345 | | public: |
1346 | | static constexpr auto name = "json_length"; |
1347 | 1 | String get_name() const override { return name; } |
1348 | 44 | static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); } |
1349 | | |
1350 | 35 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1351 | 35 | return make_nullable(std::make_shared<DataTypeInt32>()); |
1352 | 35 | } |
1353 | 43 | DataTypes get_variadic_argument_types_impl() const override { |
1354 | 43 | return Impl::get_variadic_argument_types(); |
1355 | 43 | } |
1356 | 35 | size_t get_number_of_arguments() const override { |
1357 | 35 | return get_variadic_argument_types_impl().size(); |
1358 | 35 | } |
1359 | | |
1360 | 124 | bool use_default_implementation_for_nulls() const override { return false; } |
1361 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1362 | 89 | uint32_t result, size_t input_rows_count) const override { |
1363 | 89 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1364 | 89 | } |
1365 | | }; |
1366 | | |
1367 | | struct JsonbLengthUtil { |
1368 | | static Status jsonb_length_execute(FunctionContext* context, Block& block, |
1369 | | const ColumnNumbers& arguments, uint32_t result, |
1370 | 89 | size_t input_rows_count) { |
1371 | 89 | DCHECK_GE(arguments.size(), 2); |
1372 | 89 | ColumnPtr jsonb_data_column; |
1373 | 89 | bool jsonb_data_const = false; |
1374 | | // prepare jsonb data column |
1375 | 89 | std::tie(jsonb_data_column, jsonb_data_const) = |
1376 | 89 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1377 | 89 | ColumnPtr path_column; |
1378 | 89 | bool is_const = false; |
1379 | 89 | std::tie(path_column, is_const) = |
1380 | 89 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1381 | | |
1382 | 89 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1383 | 89 | auto return_type = block.get_data_type(result); |
1384 | 89 | MutableColumnPtr res = return_type->create_column(); |
1385 | | |
1386 | 89 | JsonbPath path; |
1387 | 89 | if (is_const) { |
1388 | 61 | if (path_column->is_null_at(0)) { |
1389 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1390 | 1 | null_map->get_data()[i] = 1; |
1391 | 1 | res->insert_data(nullptr, 0); |
1392 | 1 | } |
1393 | | |
1394 | 1 | block.replace_by_position( |
1395 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1396 | 1 | return Status::OK(); |
1397 | 1 | } |
1398 | | |
1399 | 60 | auto path_value = path_column->get_data_at(0); |
1400 | 60 | if (!path.seek(path_value.data, path_value.size)) { |
1401 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1402 | 0 | std::string_view(path_value.data, path_value.size)); |
1403 | 0 | } |
1404 | 60 | } |
1405 | | |
1406 | 263 | for (size_t i = 0; i < input_rows_count; ++i) { |
1407 | 175 | if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) || |
1408 | 175 | (jsonb_data_column->get_data_at(i).size == 0)) { |
1409 | 18 | null_map->get_data()[i] = 1; |
1410 | 18 | res->insert_data(nullptr, 0); |
1411 | 18 | continue; |
1412 | 18 | } |
1413 | 157 | if (!is_const) { |
1414 | 25 | auto path_value = path_column->get_data_at(i); |
1415 | 25 | path.clean(); |
1416 | 25 | if (!path.seek(path_value.data, path_value.size)) { |
1417 | 0 | return Status::InvalidArgument( |
1418 | 0 | "Json path error: Invalid Json Path for value: {}", |
1419 | 0 | std::string_view(reinterpret_cast<const char*>(path_value.data), |
1420 | 0 | path_value.size)); |
1421 | 0 | } |
1422 | 25 | } |
1423 | 157 | auto jsonb_value = jsonb_data_column->get_data_at(i); |
1424 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1425 | 157 | const JsonbDocument* doc = nullptr; |
1426 | 157 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data, |
1427 | 157 | jsonb_value.size, &doc)); |
1428 | 157 | auto find_result = doc->getValue()->findValue(path); |
1429 | 157 | const auto* value = find_result.value; |
1430 | 157 | if (UNLIKELY(!value)) { |
1431 | 74 | null_map->get_data()[i] = 1; |
1432 | 74 | res->insert_data(nullptr, 0); |
1433 | 74 | continue; |
1434 | 74 | } |
1435 | 83 | auto length = value->numElements(); |
1436 | 83 | res->insert_data(const_cast<const char*>((char*)&length), 0); |
1437 | 83 | } |
1438 | 88 | block.replace_by_position(result, |
1439 | 88 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1440 | 88 | return Status::OK(); |
1441 | 88 | } |
1442 | | }; |
1443 | | |
1444 | | struct JsonbLengthAndPathImpl { |
1445 | 43 | static DataTypes get_variadic_argument_types() { |
1446 | 43 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; |
1447 | 43 | } |
1448 | | |
1449 | | static Status execute_impl(FunctionContext* context, Block& block, |
1450 | | const ColumnNumbers& arguments, uint32_t result, |
1451 | 89 | size_t input_rows_count) { |
1452 | 89 | return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result, |
1453 | 89 | input_rows_count); |
1454 | 89 | } |
1455 | | }; |
1456 | | |
1457 | | template <typename Impl> |
1458 | | class FunctionJsonbContains : public IFunction { |
1459 | | public: |
1460 | | static constexpr auto name = "json_contains"; |
1461 | 1 | String get_name() const override { return name; } |
1462 | 58 | static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); } |
1463 | | |
1464 | 49 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1465 | 49 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
1466 | 49 | } |
1467 | 57 | DataTypes get_variadic_argument_types_impl() const override { |
1468 | 57 | return Impl::get_variadic_argument_types(); |
1469 | 57 | } |
1470 | 49 | size_t get_number_of_arguments() const override { |
1471 | 49 | return get_variadic_argument_types_impl().size(); |
1472 | 49 | } |
1473 | | |
1474 | 176 | bool use_default_implementation_for_nulls() const override { return false; } |
1475 | | |
1476 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1477 | 127 | uint32_t result, size_t input_rows_count) const override { |
1478 | 127 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1479 | 127 | } |
1480 | | }; |
1481 | | |
1482 | | struct JsonbContainsUtil { |
1483 | | static Status jsonb_contains_execute(FunctionContext* context, Block& block, |
1484 | | const ColumnNumbers& arguments, uint32_t result, |
1485 | 127 | size_t input_rows_count) { |
1486 | 127 | DCHECK_GE(arguments.size(), 3); |
1487 | | |
1488 | 127 | auto jsonb_data1_column = block.get_by_position(arguments[0]).column; |
1489 | 127 | auto jsonb_data2_column = block.get_by_position(arguments[1]).column; |
1490 | | |
1491 | 127 | ColumnPtr path_column; |
1492 | 127 | bool is_const = false; |
1493 | 127 | std::tie(path_column, is_const) = |
1494 | 127 | unpack_if_const(block.get_by_position(arguments[2]).column); |
1495 | | |
1496 | 127 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1497 | 127 | auto return_type = block.get_data_type(result); |
1498 | 127 | MutableColumnPtr res = return_type->create_column(); |
1499 | | |
1500 | 127 | JsonbPath path; |
1501 | 127 | if (is_const) { |
1502 | 84 | if (path_column->is_null_at(0)) { |
1503 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1504 | 1 | null_map->get_data()[i] = 1; |
1505 | 1 | res->insert_data(nullptr, 0); |
1506 | 1 | } |
1507 | | |
1508 | 1 | block.replace_by_position( |
1509 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1510 | 1 | return Status::OK(); |
1511 | 1 | } |
1512 | | |
1513 | 83 | auto path_value = path_column->get_data_at(0); |
1514 | 83 | if (!path.seek(path_value.data, path_value.size)) { |
1515 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1516 | 1 | std::string_view(path_value.data, path_value.size)); |
1517 | 1 | } |
1518 | 83 | } |
1519 | | |
1520 | 379 | for (size_t i = 0; i < input_rows_count; ++i) { |
1521 | 255 | if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) || |
1522 | 255 | path_column->is_null_at(i)) { |
1523 | 28 | null_map->get_data()[i] = 1; |
1524 | 28 | res->insert_data(nullptr, 0); |
1525 | 28 | continue; |
1526 | 28 | } |
1527 | | |
1528 | 227 | if (!is_const) { |
1529 | 47 | auto path_value = path_column->get_data_at(i); |
1530 | 47 | path.clean(); |
1531 | 47 | if (!path.seek(path_value.data, path_value.size)) { |
1532 | 1 | return Status::InvalidArgument( |
1533 | 1 | "Json path error: Invalid Json Path for value: {}", |
1534 | 1 | std::string_view(path_value.data, path_value.size)); |
1535 | 1 | } |
1536 | 47 | } |
1537 | | |
1538 | 226 | auto jsonb_value1 = jsonb_data1_column->get_data_at(i); |
1539 | 226 | auto jsonb_value2 = jsonb_data2_column->get_data_at(i); |
1540 | | |
1541 | 226 | if (jsonb_value1.size == 0 || jsonb_value2.size == 0) { |
1542 | 1 | null_map->get_data()[i] = 1; |
1543 | 1 | res->insert_data(nullptr, 0); |
1544 | 1 | continue; |
1545 | 1 | } |
1546 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1547 | 225 | const JsonbDocument* doc1 = nullptr; |
1548 | 225 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data, |
1549 | 225 | jsonb_value1.size, &doc1)); |
1550 | 225 | const JsonbDocument* doc2 = nullptr; |
1551 | 225 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data, |
1552 | 225 | jsonb_value2.size, &doc2)); |
1553 | | |
1554 | 225 | auto find_result = doc1->getValue()->findValue(path); |
1555 | 225 | const auto* value1 = find_result.value; |
1556 | 225 | const JsonbValue* value2 = doc2->getValue(); |
1557 | 225 | if (!value1 || !value2) { |
1558 | 45 | null_map->get_data()[i] = 1; |
1559 | 45 | res->insert_data(nullptr, 0); |
1560 | 45 | continue; |
1561 | 45 | } |
1562 | 180 | auto contains_value = value1->contains(value2); |
1563 | 180 | res->insert_data(const_cast<const char*>((char*)&contains_value), 0); |
1564 | 180 | } |
1565 | | |
1566 | 124 | block.replace_by_position(result, |
1567 | 124 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1568 | 124 | return Status::OK(); |
1569 | 125 | } |
1570 | | }; |
1571 | | |
1572 | | template <bool ignore_null> |
1573 | | class FunctionJsonbArray : public IFunction { |
1574 | | public: |
1575 | | static constexpr auto name = "json_array"; |
1576 | | static constexpr auto alias = "jsonb_array"; |
1577 | | |
1578 | 48 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }_ZN5doris18FunctionJsonbArrayILb0EE6createEv Line | Count | Source | 1578 | 37 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
_ZN5doris18FunctionJsonbArrayILb1EE6createEv Line | Count | Source | 1578 | 11 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
|
1579 | | |
1580 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev |
1581 | | |
1582 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv |
1583 | 32 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv Line | Count | Source | 1583 | 29 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv Line | Count | Source | 1583 | 3 | bool is_variadic() const override { return true; } |
|
1584 | | |
1585 | 58 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1585 | 54 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1585 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1586 | | |
1587 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1588 | 30 | return std::make_shared<DataTypeJsonb>(); |
1589 | 30 | } _ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1587 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1588 | 28 | return std::make_shared<DataTypeJsonb>(); | 1589 | 28 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1587 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1588 | 2 | return std::make_shared<DataTypeJsonb>(); | 1589 | 2 | } |
|
1590 | | |
1591 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1592 | 30 | uint32_t result, size_t input_rows_count) const override { |
1593 | 30 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1594 | 30 | auto column = return_data_type->create_column(); |
1595 | 30 | column->reserve(input_rows_count); |
1596 | | |
1597 | 30 | JsonbWriter writer; |
1598 | 94 | for (size_t i = 0; i < input_rows_count; ++i) { |
1599 | 60 | writer.writeStartArray(); |
1600 | 173 | for (auto argument : arguments) { |
1601 | 173 | auto&& [arg_column, is_const] = |
1602 | 173 | unpack_if_const(block.get_by_position(argument).column); |
1603 | 173 | if (arg_column->is_nullable()) { |
1604 | 83 | const auto& nullable_column = |
1605 | 83 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1606 | 83 | *arg_column); |
1607 | 83 | const auto& null_map = nullable_column.get_null_map_data(); |
1608 | 83 | const auto& nested_column = nullable_column.get_nested_column(); |
1609 | 83 | const auto& jsonb_column = |
1610 | 83 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1611 | 83 | nested_column); |
1612 | | |
1613 | 83 | auto index = index_check_const(i, is_const); |
1614 | 83 | if (null_map[index]) { |
1615 | 30 | if constexpr (ignore_null) { |
1616 | 4 | continue; |
1617 | 26 | } else { |
1618 | 26 | writer.writeNull(); |
1619 | 26 | } |
1620 | 53 | } else { |
1621 | 53 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1622 | 53 | const JsonbDocument* doc = nullptr; |
1623 | 53 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1624 | 53 | jsonb_binary.size, &doc); |
1625 | 53 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1626 | 0 | if constexpr (ignore_null) { |
1627 | 0 | continue; |
1628 | 0 | } else { |
1629 | 0 | writer.writeNull(); |
1630 | 0 | } |
1631 | 53 | } else { |
1632 | 53 | writer.writeValue(doc->getValue()); |
1633 | 53 | } |
1634 | 53 | } |
1635 | 90 | } else { |
1636 | 90 | const auto& jsonb_column = |
1637 | 90 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1638 | 90 | *arg_column); |
1639 | | |
1640 | 90 | auto index = index_check_const(i, is_const); |
1641 | 90 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1642 | 90 | const JsonbDocument* doc = nullptr; |
1643 | 90 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1644 | 90 | jsonb_binary.size, &doc); |
1645 | 90 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1646 | 0 | if constexpr (ignore_null) { |
1647 | 0 | continue; |
1648 | 0 | } else { |
1649 | 0 | writer.writeNull(); |
1650 | 0 | } |
1651 | 90 | } else { |
1652 | 90 | writer.writeValue(doc->getValue()); |
1653 | 90 | } |
1654 | 90 | } |
1655 | 173 | } |
1656 | 19 | writer.writeEndArray(); |
1657 | 19 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1658 | 19 | writer.reset(); |
1659 | 19 | } |
1660 | | |
1661 | 6 | block.get_by_position(result).column = std::move(column); |
1662 | 6 | return Status::OK(); |
1663 | 30 | } _ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1592 | 28 | uint32_t result, size_t input_rows_count) const override { | 1593 | 28 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1594 | 28 | auto column = return_data_type->create_column(); | 1595 | 28 | column->reserve(input_rows_count); | 1596 | | | 1597 | 28 | JsonbWriter writer; | 1598 | 73 | for (size_t i = 0; i < input_rows_count; ++i) { | 1599 | 45 | writer.writeStartArray(); | 1600 | 143 | for (auto argument : arguments) { | 1601 | 143 | auto&& [arg_column, is_const] = | 1602 | 143 | unpack_if_const(block.get_by_position(argument).column); | 1603 | 143 | if (arg_column->is_nullable()) { | 1604 | 58 | const auto& nullable_column = | 1605 | 58 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1606 | 58 | *arg_column); | 1607 | 58 | const auto& null_map = nullable_column.get_null_map_data(); | 1608 | 58 | const auto& nested_column = nullable_column.get_nested_column(); | 1609 | 58 | const auto& jsonb_column = | 1610 | 58 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1611 | 58 | nested_column); | 1612 | | | 1613 | 58 | auto index = index_check_const(i, is_const); | 1614 | 58 | if (null_map[index]) { | 1615 | | if constexpr (ignore_null) { | 1616 | | continue; | 1617 | 26 | } else { | 1618 | 26 | writer.writeNull(); | 1619 | 26 | } | 1620 | 32 | } else { | 1621 | 32 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1622 | 32 | const JsonbDocument* doc = nullptr; | 1623 | 32 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1624 | 32 | jsonb_binary.size, &doc); | 1625 | 32 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1626 | | if constexpr (ignore_null) { | 1627 | | continue; | 1628 | 0 | } else { | 1629 | 0 | writer.writeNull(); | 1630 | 0 | } | 1631 | 32 | } else { | 1632 | 32 | writer.writeValue(doc->getValue()); | 1633 | 32 | } | 1634 | 32 | } | 1635 | 85 | } else { | 1636 | 85 | const auto& jsonb_column = | 1637 | 85 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1638 | 85 | *arg_column); | 1639 | | | 1640 | 85 | auto index = index_check_const(i, is_const); | 1641 | 85 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1642 | 85 | const JsonbDocument* doc = nullptr; | 1643 | 85 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1644 | 85 | jsonb_binary.size, &doc); | 1645 | 85 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1646 | | if constexpr (ignore_null) { | 1647 | | continue; | 1648 | 0 | } else { | 1649 | 0 | writer.writeNull(); | 1650 | 0 | } | 1651 | 85 | } else { | 1652 | 85 | writer.writeValue(doc->getValue()); | 1653 | 85 | } | 1654 | 85 | } | 1655 | 143 | } | 1656 | 45 | writer.writeEndArray(); | 1657 | 45 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1658 | 45 | writer.reset(); | 1659 | 45 | } | 1660 | | | 1661 | 28 | block.get_by_position(result).column = std::move(column); | 1662 | 28 | return Status::OK(); | 1663 | 28 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1592 | 2 | uint32_t result, size_t input_rows_count) const override { | 1593 | 2 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1594 | 2 | auto column = return_data_type->create_column(); | 1595 | 2 | column->reserve(input_rows_count); | 1596 | | | 1597 | 2 | JsonbWriter writer; | 1598 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 1599 | 15 | writer.writeStartArray(); | 1600 | 30 | for (auto argument : arguments) { | 1601 | 30 | auto&& [arg_column, is_const] = | 1602 | 30 | unpack_if_const(block.get_by_position(argument).column); | 1603 | 30 | if (arg_column->is_nullable()) { | 1604 | 25 | const auto& nullable_column = | 1605 | 25 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1606 | 25 | *arg_column); | 1607 | 25 | const auto& null_map = nullable_column.get_null_map_data(); | 1608 | 25 | const auto& nested_column = nullable_column.get_nested_column(); | 1609 | 25 | const auto& jsonb_column = | 1610 | 25 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1611 | 25 | nested_column); | 1612 | | | 1613 | 25 | auto index = index_check_const(i, is_const); | 1614 | 25 | if (null_map[index]) { | 1615 | 4 | if constexpr (ignore_null) { | 1616 | 4 | continue; | 1617 | | } else { | 1618 | | writer.writeNull(); | 1619 | | } | 1620 | 21 | } else { | 1621 | 21 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1622 | 21 | const JsonbDocument* doc = nullptr; | 1623 | 21 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1624 | 21 | jsonb_binary.size, &doc); | 1625 | 21 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1626 | 0 | if constexpr (ignore_null) { | 1627 | 0 | continue; | 1628 | | } else { | 1629 | | writer.writeNull(); | 1630 | | } | 1631 | 21 | } else { | 1632 | 21 | writer.writeValue(doc->getValue()); | 1633 | 21 | } | 1634 | 21 | } | 1635 | 25 | } else { | 1636 | 5 | const auto& jsonb_column = | 1637 | 5 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1638 | 5 | *arg_column); | 1639 | | | 1640 | 5 | auto index = index_check_const(i, is_const); | 1641 | 5 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1642 | 5 | const JsonbDocument* doc = nullptr; | 1643 | 5 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1644 | 5 | jsonb_binary.size, &doc); | 1645 | 5 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1646 | 0 | if constexpr (ignore_null) { | 1647 | 0 | continue; | 1648 | | } else { | 1649 | | writer.writeNull(); | 1650 | | } | 1651 | 5 | } else { | 1652 | 5 | writer.writeValue(doc->getValue()); | 1653 | 5 | } | 1654 | 5 | } | 1655 | 30 | } | 1656 | 19 | writer.writeEndArray(); | 1657 | 19 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1658 | 19 | writer.reset(); | 1659 | 19 | } | 1660 | | | 1661 | 6 | block.get_by_position(result).column = std::move(column); | 1662 | 6 | return Status::OK(); | 1663 | 2 | } |
|
1664 | | }; |
1665 | | |
1666 | | class FunctionJsonbObject : public IFunction { |
1667 | | public: |
1668 | | static constexpr auto name = "json_object"; |
1669 | | static constexpr auto alias = "jsonb_object"; |
1670 | | |
1671 | 42 | static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); } |
1672 | | |
1673 | 0 | String get_name() const override { return name; } |
1674 | | |
1675 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1676 | 34 | bool is_variadic() const override { return true; } |
1677 | | |
1678 | 73 | bool use_default_implementation_for_nulls() const override { return false; } |
1679 | | |
1680 | 33 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1681 | 33 | return std::make_shared<DataTypeJsonb>(); |
1682 | 33 | } |
1683 | | |
1684 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1685 | 44 | uint32_t result, size_t input_rows_count) const override { |
1686 | 44 | if (arguments.size() % 2 != 0) { |
1687 | 0 | return Status::InvalidArgument( |
1688 | 0 | "JSON object must have an even number of arguments, but got: {}", |
1689 | 0 | arguments.size()); |
1690 | 0 | } |
1691 | | |
1692 | 44 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1693 | | |
1694 | 44 | auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const, |
1695 | 194 | const NullMap* null_map, const size_t arg_index, const size_t row_idx) { |
1696 | 194 | auto index = index_check_const(row_idx, is_const); |
1697 | 194 | if (null_map && (*null_map)[index]) { |
1698 | 1 | return Status::InvalidArgument( |
1699 | 1 | "JSON documents may not contain NULL member name(argument " |
1700 | 1 | "index: " |
1701 | 1 | "{}, row index: {})", |
1702 | 1 | row_idx, arg_index); |
1703 | 1 | } |
1704 | | |
1705 | 193 | auto key_string = key_col.get_data_at(index); |
1706 | 193 | if (key_string.size > 255) { |
1707 | 0 | return Status::InvalidArgument( |
1708 | 0 | "JSON object keys(argument index: {}) must be less than 256 " |
1709 | 0 | "bytes, but got size: {}", |
1710 | 0 | arg_index, key_string.size); |
1711 | 0 | } |
1712 | 193 | writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size)); |
1713 | 193 | return Status::OK(); |
1714 | 193 | }; |
1715 | | |
1716 | 44 | auto write_value = [](JsonbWriter& writer, const ColumnString& value_col, |
1717 | 44 | const bool is_const, const NullMap* null_map, const size_t arg_index, |
1718 | 193 | const size_t row_idx) { |
1719 | 193 | auto index = index_check_const(row_idx, is_const); |
1720 | 193 | if (null_map && (*null_map)[index]) { |
1721 | 46 | writer.writeNull(); |
1722 | 46 | return Status::OK(); |
1723 | 46 | } |
1724 | | |
1725 | 147 | auto value_string = value_col.get_data_at(index); |
1726 | 147 | const JsonbDocument* doc = nullptr; |
1727 | 147 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
1728 | 147 | value_string.size, &doc)); |
1729 | 147 | writer.writeValue(doc->getValue()); |
1730 | 147 | return Status::OK(); |
1731 | 147 | }; |
1732 | | |
1733 | 142 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1734 | 98 | auto key_argument = arguments[arg_idx]; |
1735 | 98 | auto value_argument = arguments[arg_idx + 1]; |
1736 | | |
1737 | 98 | auto& key_data_type = block.get_by_position(key_argument).type; |
1738 | 98 | auto& value_data_type = block.get_by_position(value_argument).type; |
1739 | 98 | if (!is_string_type(key_data_type->get_primitive_type())) { |
1740 | 0 | return Status::InvalidArgument( |
1741 | 0 | "JSON object key(argument index: {}) must be String, but got type: " |
1742 | 0 | "{}(primitive type: {})", |
1743 | 0 | arg_idx, key_data_type->get_name(), |
1744 | 0 | static_cast<int>(key_data_type->get_primitive_type())); |
1745 | 0 | } |
1746 | | |
1747 | 98 | if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) { |
1748 | 0 | return Status::InvalidArgument( |
1749 | 0 | "JSON object value(argument index: {}) must be JSON, but got type: {}", |
1750 | 0 | arg_idx, value_data_type->get_name()); |
1751 | 0 | } |
1752 | 98 | } |
1753 | | |
1754 | 44 | auto column = return_data_type->create_column(); |
1755 | 44 | column->reserve(input_rows_count); |
1756 | | |
1757 | 44 | JsonbWriter writer; |
1758 | 108 | for (size_t i = 0; i != input_rows_count; ++i) { |
1759 | 65 | writer.writeStartObject(); |
1760 | 258 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1761 | 194 | auto key_argument = arguments[arg_idx]; |
1762 | 194 | auto value_argument = arguments[arg_idx + 1]; |
1763 | 194 | auto&& [key_column, key_const] = |
1764 | 194 | unpack_if_const(block.get_by_position(key_argument).column); |
1765 | 194 | auto&& [value_column, value_const] = |
1766 | 194 | unpack_if_const(block.get_by_position(value_argument).column); |
1767 | | |
1768 | 194 | if (key_column->is_nullable()) { |
1769 | 3 | const auto& nullable_column = |
1770 | 3 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1771 | 3 | *key_column); |
1772 | 3 | const auto& null_map = nullable_column.get_null_map_data(); |
1773 | 3 | const auto& nested_column = nullable_column.get_nested_column(); |
1774 | 3 | const auto& key_arg_column = |
1775 | 3 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1776 | 3 | nested_column); |
1777 | | |
1778 | 3 | RETURN_IF_ERROR( |
1779 | 3 | write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i)); |
1780 | 191 | } else { |
1781 | 191 | const auto& key_arg_column = |
1782 | 191 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1783 | 191 | *key_column); |
1784 | 191 | RETURN_IF_ERROR( |
1785 | 191 | write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i)); |
1786 | 191 | } |
1787 | | |
1788 | 193 | if (value_column->is_nullable()) { |
1789 | 93 | const auto& nullable_column = |
1790 | 93 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1791 | 93 | *value_column); |
1792 | 93 | const auto& null_map = nullable_column.get_null_map_data(); |
1793 | 93 | const auto& nested_column = nullable_column.get_nested_column(); |
1794 | 93 | const auto& value_arg_column = |
1795 | 93 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1796 | 93 | nested_column); |
1797 | | |
1798 | 93 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map, |
1799 | 93 | arg_idx + 1, i)); |
1800 | 100 | } else { |
1801 | 100 | const auto& value_arg_column = |
1802 | 100 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1803 | 100 | *value_column); |
1804 | 100 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr, |
1805 | 100 | arg_idx + 1, i)); |
1806 | 100 | } |
1807 | 193 | } |
1808 | | |
1809 | 64 | writer.writeEndObject(); |
1810 | 64 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1811 | 64 | writer.reset(); |
1812 | 64 | } |
1813 | | |
1814 | 43 | block.get_by_position(result).column = std::move(column); |
1815 | 43 | return Status::OK(); |
1816 | 44 | } |
1817 | | }; |
1818 | | |
1819 | | enum class JsonbModifyType { Insert, Set, Replace }; |
1820 | | |
1821 | | template <JsonbModifyType modify_type> |
1822 | | struct JsonbModifyName { |
1823 | | static constexpr auto name = "jsonb_modify"; |
1824 | | static constexpr auto alias = "json_modify"; |
1825 | | }; |
1826 | | |
1827 | | template <> |
1828 | | struct JsonbModifyName<JsonbModifyType::Insert> { |
1829 | | static constexpr auto name = "jsonb_insert"; |
1830 | | static constexpr auto alias = "json_insert"; |
1831 | | }; |
1832 | | template <> |
1833 | | struct JsonbModifyName<JsonbModifyType::Set> { |
1834 | | static constexpr auto name = "jsonb_set"; |
1835 | | static constexpr auto alias = "json_set"; |
1836 | | }; |
1837 | | template <> |
1838 | | struct JsonbModifyName<JsonbModifyType::Replace> { |
1839 | | static constexpr auto name = "jsonb_replace"; |
1840 | | static constexpr auto alias = "json_replace"; |
1841 | | }; |
1842 | | |
1843 | | template <JsonbModifyType modify_type> |
1844 | | class FunctionJsonbModify : public IFunction { |
1845 | | public: |
1846 | | static constexpr auto name = JsonbModifyName<modify_type>::name; |
1847 | | static constexpr auto alias = JsonbModifyName<modify_type>::alias; |
1848 | | |
1849 | 110 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv Line | Count | Source | 1849 | 37 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv Line | Count | Source | 1849 | 36 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv Line | Count | Source | 1849 | 37 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
|
1850 | | |
1851 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev |
1852 | | |
1853 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv |
1854 | 86 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv Line | Count | Source | 1854 | 29 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv Line | Count | Source | 1854 | 28 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv Line | Count | Source | 1854 | 29 | bool is_variadic() const override { return true; } |
|
1855 | | |
1856 | 166 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1856 | 56 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1856 | 54 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 1856 | 56 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1857 | | |
1858 | 83 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1859 | 83 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
1860 | 83 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1858 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1859 | 28 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1860 | 28 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1858 | 27 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1859 | 27 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1860 | 27 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1858 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1859 | 28 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1860 | 28 | } |
|
1861 | | |
1862 | | Status create_all_null_result(const DataTypePtr& return_data_type, Block& block, |
1863 | 0 | uint32_t result, size_t input_rows_count) const { |
1864 | 0 | auto result_column = return_data_type->create_column(); |
1865 | 0 | result_column->insert_default(); |
1866 | 0 | auto const_column = ColumnConst::create(std::move(result_column), input_rows_count); |
1867 | 0 | block.get_by_position(result).column = std::move(const_column); |
1868 | 0 | return Status::OK(); |
1869 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm |
1870 | | |
1871 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1872 | 83 | uint32_t result, size_t input_rows_count) const override { |
1873 | 83 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { |
1874 | 0 | return Status::InvalidArgument( |
1875 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " |
1876 | 0 | "but got: {}", |
1877 | 0 | name, arguments.size()); |
1878 | 0 | } |
1879 | | |
1880 | 83 | const size_t keys_count = (arguments.size() - 1) / 2; |
1881 | | |
1882 | 83 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
1883 | | |
1884 | 83 | auto result_column = return_data_type->create_column(); |
1885 | 83 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); |
1886 | 83 | auto& null_map = result_nullable_col.get_null_map_data(); |
1887 | 83 | auto& res_string_column = |
1888 | 83 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); |
1889 | 83 | auto& res_chars = res_string_column.get_chars(); |
1890 | 83 | auto& res_offsets = res_string_column.get_offsets(); |
1891 | | |
1892 | 83 | null_map.resize_fill(input_rows_count, 0); |
1893 | 83 | res_offsets.resize(input_rows_count); |
1894 | 83 | auto&& [json_data_arg_column, json_data_const] = |
1895 | 83 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1896 | | |
1897 | 83 | if (json_data_const) { |
1898 | 11 | if (json_data_arg_column->is_null_at(0)) { |
1899 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1900 | 0 | } |
1901 | 11 | } |
1902 | | |
1903 | 83 | std::vector<const ColumnString*> json_path_columns(keys_count); |
1904 | 83 | std::vector<bool> json_path_constant(keys_count); |
1905 | 83 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); |
1906 | | |
1907 | 83 | std::vector<const ColumnString*> json_value_columns(keys_count); |
1908 | 83 | std::vector<bool> json_value_constant(keys_count); |
1909 | 83 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); |
1910 | | |
1911 | 83 | const NullMap* json_data_null_map = nullptr; |
1912 | 83 | const ColumnString* json_data_column; |
1913 | 83 | if (json_data_arg_column->is_nullable()) { |
1914 | 83 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); |
1915 | 83 | json_data_null_map = &nullable_column.get_null_map_data(); |
1916 | 83 | const auto& nested_column = nullable_column.get_nested_column(); |
1917 | 83 | json_data_column = assert_cast<const ColumnString*>(&nested_column); |
1918 | 83 | } else { |
1919 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); |
1920 | 0 | } |
1921 | | |
1922 | 191 | for (size_t i = 1; i < arguments.size(); i += 2) { |
1923 | 108 | auto&& [path_column, path_const] = |
1924 | 108 | unpack_if_const(block.get_by_position(arguments[i]).column); |
1925 | 108 | auto&& [value_column, value_const] = |
1926 | 108 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
1927 | | |
1928 | 108 | if (path_const) { |
1929 | 27 | if (path_column->is_null_at(0)) { |
1930 | 0 | return create_all_null_result(return_data_type, block, result, |
1931 | 0 | input_rows_count); |
1932 | 0 | } |
1933 | 27 | } |
1934 | | |
1935 | 108 | json_path_constant[i / 2] = path_const; |
1936 | 108 | if (path_column->is_nullable()) { |
1937 | 6 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
1938 | 6 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1939 | 6 | const auto& nested_column = nullable_column.get_nested_column(); |
1940 | 6 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1941 | 102 | } else { |
1942 | 102 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); |
1943 | 102 | } |
1944 | | |
1945 | 108 | json_value_constant[i / 2] = value_const; |
1946 | 108 | if (value_column->is_nullable()) { |
1947 | 51 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); |
1948 | 51 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1949 | 51 | const auto& nested_column = nullable_column.get_nested_column(); |
1950 | 51 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1951 | 57 | } else { |
1952 | 57 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); |
1953 | 57 | } |
1954 | 108 | } |
1955 | | |
1956 | 83 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); |
1957 | 83 | if (json_data_const) { |
1958 | 11 | auto json_data_string = json_data_column->get_data_at(0); |
1959 | 11 | const JsonbDocument* doc = nullptr; |
1960 | 11 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1961 | 11 | json_data_string.size, &doc)); |
1962 | 11 | if (!doc || !doc->getValue()) [[unlikely]] { |
1963 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1964 | 0 | } |
1965 | 62 | for (size_t i = 0; i != input_rows_count; ++i) { |
1966 | 51 | json_documents[i] = doc; |
1967 | 51 | } |
1968 | 72 | } else { |
1969 | 144 | for (size_t i = 0; i != input_rows_count; ++i) { |
1970 | 72 | if (json_data_null_map && (*json_data_null_map)[i]) { |
1971 | 0 | null_map[i] = 1; |
1972 | 0 | json_documents[i] = nullptr; |
1973 | 0 | continue; |
1974 | 0 | } |
1975 | | |
1976 | 72 | auto json_data_string = json_data_column->get_data_at(i); |
1977 | 72 | const JsonbDocument* doc = nullptr; |
1978 | 72 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1979 | 72 | json_data_string.size, &doc)); |
1980 | 72 | if (!doc || !doc->getValue()) [[unlikely]] { |
1981 | 0 | null_map[i] = 1; |
1982 | 0 | continue; |
1983 | 0 | } |
1984 | 72 | json_documents[i] = doc; |
1985 | 72 | } |
1986 | 72 | } |
1987 | | |
1988 | 83 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); |
1989 | 83 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); |
1990 | | |
1991 | 83 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, |
1992 | 83 | json_path_columns, json_path_constant, |
1993 | 83 | json_path_null_maps, json_value_columns, |
1994 | 83 | json_value_constant, json_value_null_maps)); |
1995 | | |
1996 | 75 | JsonbWriter writer; |
1997 | 75 | struct DocumentBuffer { |
1998 | 75 | DorisUniqueBufferPtr<char> ptr; |
1999 | 75 | size_t size = 0; |
2000 | 75 | size_t capacity = 0; |
2001 | 75 | }; |
2002 | | |
2003 | 75 | DocumentBuffer tmp_buffer; |
2004 | | |
2005 | 210 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { |
2006 | 329 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2007 | 194 | const size_t index = i / 2; |
2008 | 194 | auto& json_path = json_paths[index]; |
2009 | 194 | auto& json_value = json_values[index]; |
2010 | | |
2011 | 194 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); |
2012 | 194 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); |
2013 | | |
2014 | 194 | if (null_map[row_idx]) { |
2015 | 0 | continue; |
2016 | 0 | } |
2017 | | |
2018 | 194 | if (json_documents[row_idx] == nullptr) { |
2019 | 0 | null_map[row_idx] = 1; |
2020 | 0 | continue; |
2021 | 0 | } |
2022 | | |
2023 | 194 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { |
2024 | 4 | null_map[row_idx] = 1; |
2025 | 4 | continue; |
2026 | 4 | } |
2027 | | |
2028 | 190 | auto find_result = |
2029 | 190 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); |
2030 | | |
2031 | 190 | if (find_result.is_wildcard) { |
2032 | 0 | return Status::InvalidArgument( |
2033 | 0 | " In this situation, path expressions may not contain the * and ** " |
2034 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2035 | 0 | i, row_idx); |
2036 | 0 | } |
2037 | | |
2038 | 190 | if constexpr (modify_type == JsonbModifyType::Insert) { |
2039 | 59 | if (find_result.value) { |
2040 | 18 | continue; |
2041 | 18 | } |
2042 | 67 | } else if constexpr (modify_type == JsonbModifyType::Replace) { |
2043 | 67 | if (!find_result.value) { |
2044 | 11 | continue; |
2045 | 11 | } |
2046 | 67 | } |
2047 | | |
2048 | 97 | std::vector<const JsonbValue*> parents; |
2049 | | |
2050 | 190 | bool replace = false; |
2051 | 190 | parents.emplace_back(json_documents[row_idx]->getValue()); |
2052 | 190 | if (find_result.value) { |
2053 | | // find target path, replace it with the new value. |
2054 | 100 | replace = true; |
2055 | 100 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), |
2056 | 100 | json_path[path_index], parents)) { |
2057 | 0 | DCHECK(false); |
2058 | 0 | continue; |
2059 | 0 | } |
2060 | 100 | } else { |
2061 | | // does not find target path, insert the new value. |
2062 | 90 | JsonbPath new_path; |
2063 | 146 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { |
2064 | 56 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); |
2065 | 56 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( |
2066 | 56 | current_leg->leg_ptr, current_leg->leg_len, |
2067 | 56 | current_leg->array_index, current_leg->type); |
2068 | 56 | new_path.add_leg_to_leg_vector(std::move(leg)); |
2069 | 56 | } |
2070 | | |
2071 | 90 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, |
2072 | 90 | parents)) { |
2073 | 12 | continue; |
2074 | 12 | } |
2075 | 90 | } |
2076 | | |
2077 | 178 | const auto legs_count = json_path[path_index].get_leg_vector_size(); |
2078 | 178 | leg_info* last_leg = |
2079 | 178 | legs_count > 0 |
2080 | 178 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) |
2081 | 178 | : nullptr; |
2082 | 178 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, |
2083 | 178 | json_value[value_index], replace, last_leg, |
2084 | 178 | writer)); |
2085 | | |
2086 | 178 | auto* writer_output = writer.getOutput(); |
2087 | 178 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2088 | 65 | tmp_buffer.capacity = |
2089 | 65 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2090 | 65 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); |
2091 | 65 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2092 | 65 | } |
2093 | | |
2094 | 178 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); |
2095 | 178 | tmp_buffer.size = writer_output->getSize(); |
2096 | | |
2097 | 178 | writer.reset(); |
2098 | | |
2099 | 178 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2100 | 178 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); |
2101 | 178 | } |
2102 | | |
2103 | 135 | if (!null_map[row_idx]) { |
2104 | 102 | const auto* jsonb_document = json_documents[row_idx]; |
2105 | 102 | const auto size = jsonb_document->numPackedBytes(); |
2106 | 102 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), |
2107 | 102 | reinterpret_cast<const char*>(jsonb_document) + size); |
2108 | 102 | } |
2109 | | |
2110 | 135 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2111 | | |
2112 | 135 | if (!null_map[row_idx]) { |
2113 | 102 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; |
2114 | 102 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; |
2115 | 102 | const JsonbDocument* doc = nullptr; |
2116 | 102 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2117 | 102 | reinterpret_cast<const char*>(ptr), size, &doc)); |
2118 | 102 | } |
2119 | 135 | } |
2120 | | |
2121 | 104 | block.get_by_position(result).column = std::move(result_column); |
2122 | 104 | return Status::OK(); |
2123 | 75 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1872 | 28 | uint32_t result, size_t input_rows_count) const override { | 1873 | 28 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1874 | 0 | return Status::InvalidArgument( | 1875 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1876 | 0 | "but got: {}", | 1877 | 0 | name, arguments.size()); | 1878 | 0 | } | 1879 | | | 1880 | 28 | const size_t keys_count = (arguments.size() - 1) / 2; | 1881 | | | 1882 | 28 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1883 | | | 1884 | 28 | auto result_column = return_data_type->create_column(); | 1885 | 28 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1886 | 28 | auto& null_map = result_nullable_col.get_null_map_data(); | 1887 | 28 | auto& res_string_column = | 1888 | 28 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1889 | 28 | auto& res_chars = res_string_column.get_chars(); | 1890 | 28 | auto& res_offsets = res_string_column.get_offsets(); | 1891 | | | 1892 | 28 | null_map.resize_fill(input_rows_count, 0); | 1893 | 28 | res_offsets.resize(input_rows_count); | 1894 | 28 | auto&& [json_data_arg_column, json_data_const] = | 1895 | 28 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1896 | | | 1897 | 28 | if (json_data_const) { | 1898 | 5 | if (json_data_arg_column->is_null_at(0)) { | 1899 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1900 | 0 | } | 1901 | 5 | } | 1902 | | | 1903 | 28 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1904 | 28 | std::vector<bool> json_path_constant(keys_count); | 1905 | 28 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1906 | | | 1907 | 28 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1908 | 28 | std::vector<bool> json_value_constant(keys_count); | 1909 | 28 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1910 | | | 1911 | 28 | const NullMap* json_data_null_map = nullptr; | 1912 | 28 | const ColumnString* json_data_column; | 1913 | 28 | if (json_data_arg_column->is_nullable()) { | 1914 | 28 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); | 1915 | 28 | json_data_null_map = &nullable_column.get_null_map_data(); | 1916 | 28 | const auto& nested_column = nullable_column.get_nested_column(); | 1917 | 28 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1918 | 28 | } else { | 1919 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1920 | 0 | } | 1921 | | | 1922 | 63 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1923 | 35 | auto&& [path_column, path_const] = | 1924 | 35 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1925 | 35 | auto&& [value_column, value_const] = | 1926 | 35 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1927 | | | 1928 | 35 | if (path_const) { | 1929 | 7 | if (path_column->is_null_at(0)) { | 1930 | 0 | return create_all_null_result(return_data_type, block, result, | 1931 | 0 | input_rows_count); | 1932 | 0 | } | 1933 | 7 | } | 1934 | | | 1935 | 35 | json_path_constant[i / 2] = path_const; | 1936 | 35 | if (path_column->is_nullable()) { | 1937 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 1938 | 4 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1939 | 4 | const auto& nested_column = nullable_column.get_nested_column(); | 1940 | 4 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1941 | 31 | } else { | 1942 | 31 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1943 | 31 | } | 1944 | | | 1945 | 35 | json_value_constant[i / 2] = value_const; | 1946 | 35 | if (value_column->is_nullable()) { | 1947 | 16 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); | 1948 | 16 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1949 | 16 | const auto& nested_column = nullable_column.get_nested_column(); | 1950 | 16 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1951 | 19 | } else { | 1952 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1953 | 19 | } | 1954 | 35 | } | 1955 | | | 1956 | 28 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1957 | 28 | if (json_data_const) { | 1958 | 5 | auto json_data_string = json_data_column->get_data_at(0); | 1959 | 5 | const JsonbDocument* doc = nullptr; | 1960 | 5 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1961 | 5 | json_data_string.size, &doc)); | 1962 | 5 | if (!doc || !doc->getValue()) [[unlikely]] { | 1963 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1964 | 0 | } | 1965 | 30 | for (size_t i = 0; i != input_rows_count; ++i) { | 1966 | 25 | json_documents[i] = doc; | 1967 | 25 | } | 1968 | 23 | } else { | 1969 | 46 | for (size_t i = 0; i != input_rows_count; ++i) { | 1970 | 23 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1971 | 0 | null_map[i] = 1; | 1972 | 0 | json_documents[i] = nullptr; | 1973 | 0 | continue; | 1974 | 0 | } | 1975 | | | 1976 | 23 | auto json_data_string = json_data_column->get_data_at(i); | 1977 | 23 | const JsonbDocument* doc = nullptr; | 1978 | 23 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1979 | 23 | json_data_string.size, &doc)); | 1980 | 23 | if (!doc || !doc->getValue()) [[unlikely]] { | 1981 | 0 | null_map[i] = 1; | 1982 | 0 | continue; | 1983 | 0 | } | 1984 | 23 | json_documents[i] = doc; | 1985 | 23 | } | 1986 | 23 | } | 1987 | | | 1988 | 28 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1989 | 28 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1990 | | | 1991 | 28 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1992 | 28 | json_path_columns, json_path_constant, | 1993 | 28 | json_path_null_maps, json_value_columns, | 1994 | 28 | json_value_constant, json_value_null_maps)); | 1995 | | | 1996 | 24 | JsonbWriter writer; | 1997 | 24 | struct DocumentBuffer { | 1998 | 24 | DorisUniqueBufferPtr<char> ptr; | 1999 | 24 | size_t size = 0; | 2000 | 24 | size_t capacity = 0; | 2001 | 24 | }; | 2002 | | | 2003 | 24 | DocumentBuffer tmp_buffer; | 2004 | | | 2005 | 77 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 2006 | 114 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2007 | 61 | const size_t index = i / 2; | 2008 | 61 | auto& json_path = json_paths[index]; | 2009 | 61 | auto& json_value = json_values[index]; | 2010 | | | 2011 | 61 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 2012 | 61 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 2013 | | | 2014 | 61 | if (null_map[row_idx]) { | 2015 | 0 | continue; | 2016 | 0 | } | 2017 | | | 2018 | 61 | if (json_documents[row_idx] == nullptr) { | 2019 | 0 | null_map[row_idx] = 1; | 2020 | 0 | continue; | 2021 | 0 | } | 2022 | | | 2023 | 61 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2024 | 2 | null_map[row_idx] = 1; | 2025 | 2 | continue; | 2026 | 2 | } | 2027 | | | 2028 | 59 | auto find_result = | 2029 | 59 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2030 | | | 2031 | 59 | if (find_result.is_wildcard) { | 2032 | 0 | return Status::InvalidArgument( | 2033 | 0 | " In this situation, path expressions may not contain the * and ** " | 2034 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2035 | 0 | i, row_idx); | 2036 | 0 | } | 2037 | | | 2038 | 59 | if constexpr (modify_type == JsonbModifyType::Insert) { | 2039 | 59 | if (find_result.value) { | 2040 | 18 | continue; | 2041 | 18 | } | 2042 | | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2043 | | if (!find_result.value) { | 2044 | | continue; | 2045 | | } | 2046 | | } | 2047 | | | 2048 | 41 | std::vector<const JsonbValue*> parents; | 2049 | | | 2050 | 59 | bool replace = false; | 2051 | 59 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2052 | 59 | if (find_result.value) { | 2053 | | // find target path, replace it with the new value. | 2054 | 0 | replace = true; | 2055 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2056 | 0 | json_path[path_index], parents)) { | 2057 | 0 | DCHECK(false); | 2058 | 0 | continue; | 2059 | 0 | } | 2060 | 59 | } else { | 2061 | | // does not find target path, insert the new value. | 2062 | 59 | JsonbPath new_path; | 2063 | 98 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { | 2064 | 39 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2065 | 39 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2066 | 39 | current_leg->leg_ptr, current_leg->leg_len, | 2067 | 39 | current_leg->array_index, current_leg->type); | 2068 | 39 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2069 | 39 | } | 2070 | | | 2071 | 59 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2072 | 59 | parents)) { | 2073 | 1 | continue; | 2074 | 1 | } | 2075 | 59 | } | 2076 | | | 2077 | 58 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2078 | 58 | leg_info* last_leg = | 2079 | 58 | legs_count > 0 | 2080 | 58 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2081 | 58 | : nullptr; | 2082 | 58 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2083 | 58 | json_value[value_index], replace, last_leg, | 2084 | 58 | writer)); | 2085 | | | 2086 | 58 | auto* writer_output = writer.getOutput(); | 2087 | 58 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2088 | 19 | tmp_buffer.capacity = | 2089 | 19 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2090 | 19 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2091 | 19 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2092 | 19 | } | 2093 | | | 2094 | 58 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2095 | 58 | tmp_buffer.size = writer_output->getSize(); | 2096 | | | 2097 | 58 | writer.reset(); | 2098 | | | 2099 | 58 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2100 | 58 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2101 | 58 | } | 2102 | | | 2103 | 53 | if (!null_map[row_idx]) { | 2104 | 33 | const auto* jsonb_document = json_documents[row_idx]; | 2105 | 33 | const auto size = jsonb_document->numPackedBytes(); | 2106 | 33 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2107 | 33 | reinterpret_cast<const char*>(jsonb_document) + size); | 2108 | 33 | } | 2109 | | | 2110 | 53 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2111 | | | 2112 | 53 | if (!null_map[row_idx]) { | 2113 | 33 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2114 | 33 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2115 | 33 | const JsonbDocument* doc = nullptr; | 2116 | 33 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2117 | 33 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2118 | 33 | } | 2119 | 53 | } | 2120 | | | 2121 | 42 | block.get_by_position(result).column = std::move(result_column); | 2122 | 42 | return Status::OK(); | 2123 | 24 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1872 | 27 | uint32_t result, size_t input_rows_count) const override { | 1873 | 27 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1874 | 0 | return Status::InvalidArgument( | 1875 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1876 | 0 | "but got: {}", | 1877 | 0 | name, arguments.size()); | 1878 | 0 | } | 1879 | | | 1880 | 27 | const size_t keys_count = (arguments.size() - 1) / 2; | 1881 | | | 1882 | 27 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1883 | | | 1884 | 27 | auto result_column = return_data_type->create_column(); | 1885 | 27 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1886 | 27 | auto& null_map = result_nullable_col.get_null_map_data(); | 1887 | 27 | auto& res_string_column = | 1888 | 27 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1889 | 27 | auto& res_chars = res_string_column.get_chars(); | 1890 | 27 | auto& res_offsets = res_string_column.get_offsets(); | 1891 | | | 1892 | 27 | null_map.resize_fill(input_rows_count, 0); | 1893 | 27 | res_offsets.resize(input_rows_count); | 1894 | 27 | auto&& [json_data_arg_column, json_data_const] = | 1895 | 27 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1896 | | | 1897 | 27 | if (json_data_const) { | 1898 | 3 | if (json_data_arg_column->is_null_at(0)) { | 1899 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1900 | 0 | } | 1901 | 3 | } | 1902 | | | 1903 | 27 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1904 | 27 | std::vector<bool> json_path_constant(keys_count); | 1905 | 27 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1906 | | | 1907 | 27 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1908 | 27 | std::vector<bool> json_value_constant(keys_count); | 1909 | 27 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1910 | | | 1911 | 27 | const NullMap* json_data_null_map = nullptr; | 1912 | 27 | const ColumnString* json_data_column; | 1913 | 27 | if (json_data_arg_column->is_nullable()) { | 1914 | 27 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); | 1915 | 27 | json_data_null_map = &nullable_column.get_null_map_data(); | 1916 | 27 | const auto& nested_column = nullable_column.get_nested_column(); | 1917 | 27 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1918 | 27 | } else { | 1919 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1920 | 0 | } | 1921 | | | 1922 | 62 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1923 | 35 | auto&& [path_column, path_const] = | 1924 | 35 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1925 | 35 | auto&& [value_column, value_const] = | 1926 | 35 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1927 | | | 1928 | 35 | if (path_const) { | 1929 | 9 | if (path_column->is_null_at(0)) { | 1930 | 0 | return create_all_null_result(return_data_type, block, result, | 1931 | 0 | input_rows_count); | 1932 | 0 | } | 1933 | 9 | } | 1934 | | | 1935 | 35 | json_path_constant[i / 2] = path_const; | 1936 | 35 | if (path_column->is_nullable()) { | 1937 | 1 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 1938 | 1 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1939 | 1 | const auto& nested_column = nullable_column.get_nested_column(); | 1940 | 1 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1941 | 34 | } else { | 1942 | 34 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1943 | 34 | } | 1944 | | | 1945 | 35 | json_value_constant[i / 2] = value_const; | 1946 | 35 | if (value_column->is_nullable()) { | 1947 | 16 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); | 1948 | 16 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1949 | 16 | const auto& nested_column = nullable_column.get_nested_column(); | 1950 | 16 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1951 | 19 | } else { | 1952 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1953 | 19 | } | 1954 | 35 | } | 1955 | | | 1956 | 27 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1957 | 27 | if (json_data_const) { | 1958 | 3 | auto json_data_string = json_data_column->get_data_at(0); | 1959 | 3 | const JsonbDocument* doc = nullptr; | 1960 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1961 | 3 | json_data_string.size, &doc)); | 1962 | 3 | if (!doc || !doc->getValue()) [[unlikely]] { | 1963 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1964 | 0 | } | 1965 | 17 | for (size_t i = 0; i != input_rows_count; ++i) { | 1966 | 14 | json_documents[i] = doc; | 1967 | 14 | } | 1968 | 24 | } else { | 1969 | 48 | for (size_t i = 0; i != input_rows_count; ++i) { | 1970 | 24 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1971 | 0 | null_map[i] = 1; | 1972 | 0 | json_documents[i] = nullptr; | 1973 | 0 | continue; | 1974 | 0 | } | 1975 | | | 1976 | 24 | auto json_data_string = json_data_column->get_data_at(i); | 1977 | 24 | const JsonbDocument* doc = nullptr; | 1978 | 24 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1979 | 24 | json_data_string.size, &doc)); | 1980 | 24 | if (!doc || !doc->getValue()) [[unlikely]] { | 1981 | 0 | null_map[i] = 1; | 1982 | 0 | continue; | 1983 | 0 | } | 1984 | 24 | json_documents[i] = doc; | 1985 | 24 | } | 1986 | 24 | } | 1987 | | | 1988 | 27 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1989 | 27 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1990 | | | 1991 | 27 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1992 | 27 | json_path_columns, json_path_constant, | 1993 | 27 | json_path_null_maps, json_value_columns, | 1994 | 27 | json_value_constant, json_value_null_maps)); | 1995 | | | 1996 | 25 | JsonbWriter writer; | 1997 | 25 | struct DocumentBuffer { | 1998 | 25 | DorisUniqueBufferPtr<char> ptr; | 1999 | 25 | size_t size = 0; | 2000 | 25 | size_t capacity = 0; | 2001 | 25 | }; | 2002 | | | 2003 | 25 | DocumentBuffer tmp_buffer; | 2004 | | | 2005 | 61 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 2006 | 101 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2007 | 65 | const size_t index = i / 2; | 2008 | 65 | auto& json_path = json_paths[index]; | 2009 | 65 | auto& json_value = json_values[index]; | 2010 | | | 2011 | 65 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 2012 | 65 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 2013 | | | 2014 | 65 | if (null_map[row_idx]) { | 2015 | 0 | continue; | 2016 | 0 | } | 2017 | | | 2018 | 65 | if (json_documents[row_idx] == nullptr) { | 2019 | 0 | null_map[row_idx] = 1; | 2020 | 0 | continue; | 2021 | 0 | } | 2022 | | | 2023 | 65 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2024 | 1 | null_map[row_idx] = 1; | 2025 | 1 | continue; | 2026 | 1 | } | 2027 | | | 2028 | 64 | auto find_result = | 2029 | 64 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2030 | | | 2031 | 64 | if (find_result.is_wildcard) { | 2032 | 0 | return Status::InvalidArgument( | 2033 | 0 | " In this situation, path expressions may not contain the * and ** " | 2034 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2035 | 0 | i, row_idx); | 2036 | 0 | } | 2037 | | | 2038 | | if constexpr (modify_type == JsonbModifyType::Insert) { | 2039 | | if (find_result.value) { | 2040 | | continue; | 2041 | | } | 2042 | 64 | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2043 | 64 | if (!find_result.value) { | 2044 | 64 | continue; | 2045 | 64 | } | 2046 | 64 | } | 2047 | | | 2048 | 64 | std::vector<const JsonbValue*> parents; | 2049 | | | 2050 | 64 | bool replace = false; | 2051 | 64 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2052 | 64 | if (find_result.value) { | 2053 | | // find target path, replace it with the new value. | 2054 | 44 | replace = true; | 2055 | 44 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2056 | 44 | json_path[path_index], parents)) { | 2057 | 0 | DCHECK(false); | 2058 | 0 | continue; | 2059 | 0 | } | 2060 | 44 | } else { | 2061 | | // does not find target path, insert the new value. | 2062 | 20 | JsonbPath new_path; | 2063 | 37 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { | 2064 | 17 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2065 | 17 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2066 | 17 | current_leg->leg_ptr, current_leg->leg_len, | 2067 | 17 | current_leg->array_index, current_leg->type); | 2068 | 17 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2069 | 17 | } | 2070 | | | 2071 | 20 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2072 | 20 | parents)) { | 2073 | 11 | continue; | 2074 | 11 | } | 2075 | 20 | } | 2076 | | | 2077 | 53 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2078 | 53 | leg_info* last_leg = | 2079 | 53 | legs_count > 0 | 2080 | 53 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2081 | 53 | : nullptr; | 2082 | 53 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2083 | 53 | json_value[value_index], replace, last_leg, | 2084 | 53 | writer)); | 2085 | | | 2086 | 53 | auto* writer_output = writer.getOutput(); | 2087 | 53 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2088 | 23 | tmp_buffer.capacity = | 2089 | 23 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2090 | 23 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2091 | 23 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2092 | 23 | } | 2093 | | | 2094 | 53 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2095 | 53 | tmp_buffer.size = writer_output->getSize(); | 2096 | | | 2097 | 53 | writer.reset(); | 2098 | | | 2099 | 53 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2100 | 53 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2101 | 53 | } | 2102 | | | 2103 | 36 | if (!null_map[row_idx]) { | 2104 | 35 | const auto* jsonb_document = json_documents[row_idx]; | 2105 | 35 | const auto size = jsonb_document->numPackedBytes(); | 2106 | 35 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2107 | 35 | reinterpret_cast<const char*>(jsonb_document) + size); | 2108 | 35 | } | 2109 | | | 2110 | 36 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2111 | | | 2112 | 36 | if (!null_map[row_idx]) { | 2113 | 35 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2114 | 35 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2115 | 35 | const JsonbDocument* doc = nullptr; | 2116 | 35 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2117 | 35 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2118 | 35 | } | 2119 | 36 | } | 2120 | | | 2121 | 25 | block.get_by_position(result).column = std::move(result_column); | 2122 | 25 | return Status::OK(); | 2123 | 25 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1872 | 28 | uint32_t result, size_t input_rows_count) const override { | 1873 | 28 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1874 | 0 | return Status::InvalidArgument( | 1875 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1876 | 0 | "but got: {}", | 1877 | 0 | name, arguments.size()); | 1878 | 0 | } | 1879 | | | 1880 | 28 | const size_t keys_count = (arguments.size() - 1) / 2; | 1881 | | | 1882 | 28 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1883 | | | 1884 | 28 | auto result_column = return_data_type->create_column(); | 1885 | 28 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1886 | 28 | auto& null_map = result_nullable_col.get_null_map_data(); | 1887 | 28 | auto& res_string_column = | 1888 | 28 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1889 | 28 | auto& res_chars = res_string_column.get_chars(); | 1890 | 28 | auto& res_offsets = res_string_column.get_offsets(); | 1891 | | | 1892 | 28 | null_map.resize_fill(input_rows_count, 0); | 1893 | 28 | res_offsets.resize(input_rows_count); | 1894 | 28 | auto&& [json_data_arg_column, json_data_const] = | 1895 | 28 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1896 | | | 1897 | 28 | if (json_data_const) { | 1898 | 3 | if (json_data_arg_column->is_null_at(0)) { | 1899 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1900 | 0 | } | 1901 | 3 | } | 1902 | | | 1903 | 28 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1904 | 28 | std::vector<bool> json_path_constant(keys_count); | 1905 | 28 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1906 | | | 1907 | 28 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1908 | 28 | std::vector<bool> json_value_constant(keys_count); | 1909 | 28 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1910 | | | 1911 | 28 | const NullMap* json_data_null_map = nullptr; | 1912 | 28 | const ColumnString* json_data_column; | 1913 | 28 | if (json_data_arg_column->is_nullable()) { | 1914 | 28 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); | 1915 | 28 | json_data_null_map = &nullable_column.get_null_map_data(); | 1916 | 28 | const auto& nested_column = nullable_column.get_nested_column(); | 1917 | 28 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1918 | 28 | } else { | 1919 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1920 | 0 | } | 1921 | | | 1922 | 66 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1923 | 38 | auto&& [path_column, path_const] = | 1924 | 38 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1925 | 38 | auto&& [value_column, value_const] = | 1926 | 38 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1927 | | | 1928 | 38 | if (path_const) { | 1929 | 11 | if (path_column->is_null_at(0)) { | 1930 | 0 | return create_all_null_result(return_data_type, block, result, | 1931 | 0 | input_rows_count); | 1932 | 0 | } | 1933 | 11 | } | 1934 | | | 1935 | 38 | json_path_constant[i / 2] = path_const; | 1936 | 38 | if (path_column->is_nullable()) { | 1937 | 1 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 1938 | 1 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1939 | 1 | const auto& nested_column = nullable_column.get_nested_column(); | 1940 | 1 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1941 | 37 | } else { | 1942 | 37 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1943 | 37 | } | 1944 | | | 1945 | 38 | json_value_constant[i / 2] = value_const; | 1946 | 38 | if (value_column->is_nullable()) { | 1947 | 19 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); | 1948 | 19 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); | 1949 | 19 | const auto& nested_column = nullable_column.get_nested_column(); | 1950 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1951 | 19 | } else { | 1952 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1953 | 19 | } | 1954 | 38 | } | 1955 | | | 1956 | 28 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1957 | 28 | if (json_data_const) { | 1958 | 3 | auto json_data_string = json_data_column->get_data_at(0); | 1959 | 3 | const JsonbDocument* doc = nullptr; | 1960 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1961 | 3 | json_data_string.size, &doc)); | 1962 | 3 | if (!doc || !doc->getValue()) [[unlikely]] { | 1963 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1964 | 0 | } | 1965 | 15 | for (size_t i = 0; i != input_rows_count; ++i) { | 1966 | 12 | json_documents[i] = doc; | 1967 | 12 | } | 1968 | 25 | } else { | 1969 | 50 | for (size_t i = 0; i != input_rows_count; ++i) { | 1970 | 25 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1971 | 0 | null_map[i] = 1; | 1972 | 0 | json_documents[i] = nullptr; | 1973 | 0 | continue; | 1974 | 0 | } | 1975 | | | 1976 | 25 | auto json_data_string = json_data_column->get_data_at(i); | 1977 | 25 | const JsonbDocument* doc = nullptr; | 1978 | 25 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1979 | 25 | json_data_string.size, &doc)); | 1980 | 25 | if (!doc || !doc->getValue()) [[unlikely]] { | 1981 | 0 | null_map[i] = 1; | 1982 | 0 | continue; | 1983 | 0 | } | 1984 | 25 | json_documents[i] = doc; | 1985 | 25 | } | 1986 | 25 | } | 1987 | | | 1988 | 28 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1989 | 28 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1990 | | | 1991 | 28 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1992 | 28 | json_path_columns, json_path_constant, | 1993 | 28 | json_path_null_maps, json_value_columns, | 1994 | 28 | json_value_constant, json_value_null_maps)); | 1995 | | | 1996 | 26 | JsonbWriter writer; | 1997 | 26 | struct DocumentBuffer { | 1998 | 26 | DorisUniqueBufferPtr<char> ptr; | 1999 | 26 | size_t size = 0; | 2000 | 26 | size_t capacity = 0; | 2001 | 26 | }; | 2002 | | | 2003 | 26 | DocumentBuffer tmp_buffer; | 2004 | | | 2005 | 72 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 2006 | 114 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2007 | 68 | const size_t index = i / 2; | 2008 | 68 | auto& json_path = json_paths[index]; | 2009 | 68 | auto& json_value = json_values[index]; | 2010 | | | 2011 | 68 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 2012 | 68 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 2013 | | | 2014 | 68 | if (null_map[row_idx]) { | 2015 | 0 | continue; | 2016 | 0 | } | 2017 | | | 2018 | 68 | if (json_documents[row_idx] == nullptr) { | 2019 | 0 | null_map[row_idx] = 1; | 2020 | 0 | continue; | 2021 | 0 | } | 2022 | | | 2023 | 68 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2024 | 1 | null_map[row_idx] = 1; | 2025 | 1 | continue; | 2026 | 1 | } | 2027 | | | 2028 | 67 | auto find_result = | 2029 | 67 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2030 | | | 2031 | 67 | if (find_result.is_wildcard) { | 2032 | 0 | return Status::InvalidArgument( | 2033 | 0 | " In this situation, path expressions may not contain the * and ** " | 2034 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2035 | 0 | i, row_idx); | 2036 | 0 | } | 2037 | | | 2038 | | if constexpr (modify_type == JsonbModifyType::Insert) { | 2039 | | if (find_result.value) { | 2040 | | continue; | 2041 | | } | 2042 | 67 | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2043 | 67 | if (!find_result.value) { | 2044 | 11 | continue; | 2045 | 11 | } | 2046 | 67 | } | 2047 | | | 2048 | 56 | std::vector<const JsonbValue*> parents; | 2049 | | | 2050 | 67 | bool replace = false; | 2051 | 67 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2052 | 67 | if (find_result.value) { | 2053 | | // find target path, replace it with the new value. | 2054 | 56 | replace = true; | 2055 | 56 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2056 | 56 | json_path[path_index], parents)) { | 2057 | 0 | DCHECK(false); | 2058 | 0 | continue; | 2059 | 0 | } | 2060 | 56 | } else { | 2061 | | // does not find target path, insert the new value. | 2062 | 11 | JsonbPath new_path; | 2063 | 11 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { | 2064 | 0 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2065 | 0 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2066 | 0 | current_leg->leg_ptr, current_leg->leg_len, | 2067 | 0 | current_leg->array_index, current_leg->type); | 2068 | 0 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2069 | 0 | } | 2070 | | | 2071 | 11 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2072 | 11 | parents)) { | 2073 | 0 | continue; | 2074 | 0 | } | 2075 | 11 | } | 2076 | | | 2077 | 67 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2078 | 67 | leg_info* last_leg = | 2079 | 67 | legs_count > 0 | 2080 | 67 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2081 | 67 | : nullptr; | 2082 | 67 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2083 | 67 | json_value[value_index], replace, last_leg, | 2084 | 67 | writer)); | 2085 | | | 2086 | 67 | auto* writer_output = writer.getOutput(); | 2087 | 67 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2088 | 23 | tmp_buffer.capacity = | 2089 | 23 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2090 | 23 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2091 | 23 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2092 | 23 | } | 2093 | | | 2094 | 67 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2095 | 67 | tmp_buffer.size = writer_output->getSize(); | 2096 | | | 2097 | 67 | writer.reset(); | 2098 | | | 2099 | 67 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2100 | 67 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2101 | 67 | } | 2102 | | | 2103 | 46 | if (!null_map[row_idx]) { | 2104 | 34 | const auto* jsonb_document = json_documents[row_idx]; | 2105 | 34 | const auto size = jsonb_document->numPackedBytes(); | 2106 | 34 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2107 | 34 | reinterpret_cast<const char*>(jsonb_document) + size); | 2108 | 34 | } | 2109 | | | 2110 | 46 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2111 | | | 2112 | 46 | if (!null_map[row_idx]) { | 2113 | 34 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2114 | 34 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2115 | 34 | const JsonbDocument* doc = nullptr; | 2116 | 34 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2117 | 34 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2118 | 34 | } | 2119 | 46 | } | 2120 | | | 2121 | 37 | block.get_by_position(result).column = std::move(result_column); | 2122 | 37 | return Status::OK(); | 2123 | 26 | } |
|
2124 | | |
2125 | | bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path, |
2126 | 342 | std::vector<const JsonbValue*>& parents) const { |
2127 | 342 | const size_t index = parents.size() - 1; |
2128 | 342 | if (index == path.get_leg_vector_size()) { |
2129 | 143 | return true; |
2130 | 143 | } |
2131 | | |
2132 | 199 | JsonbPath current; |
2133 | 199 | auto* current_leg = path.get_leg_from_leg_vector(index); |
2134 | 199 | std::unique_ptr<leg_info> leg = |
2135 | 199 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, |
2136 | 199 | current_leg->array_index, current_leg->type); |
2137 | 199 | current.add_leg_to_leg_vector(std::move(leg)); |
2138 | | |
2139 | 199 | auto find_result = root->findValue(current); |
2140 | 199 | if (!find_result.value) { |
2141 | 12 | std::string path_string; |
2142 | 12 | current.to_string(&path_string); |
2143 | 12 | return false; |
2144 | 187 | } else if (find_result.value == root) { |
2145 | 6 | return true; |
2146 | 181 | } else { |
2147 | 181 | parents.emplace_back(find_result.value); |
2148 | 181 | } |
2149 | | |
2150 | 181 | return build_parents_by_path(find_result.value, path, parents); |
2151 | 199 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2126 | 79 | std::vector<const JsonbValue*>& parents) const { | 2127 | 79 | const size_t index = parents.size() - 1; | 2128 | 79 | if (index == path.get_leg_vector_size()) { | 2129 | 40 | return true; | 2130 | 40 | } | 2131 | | | 2132 | 39 | JsonbPath current; | 2133 | 39 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2134 | 39 | std::unique_ptr<leg_info> leg = | 2135 | 39 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2136 | 39 | current_leg->array_index, current_leg->type); | 2137 | 39 | current.add_leg_to_leg_vector(std::move(leg)); | 2138 | | | 2139 | 39 | auto find_result = root->findValue(current); | 2140 | 39 | if (!find_result.value) { | 2141 | 1 | std::string path_string; | 2142 | 1 | current.to_string(&path_string); | 2143 | 1 | return false; | 2144 | 38 | } else if (find_result.value == root) { | 2145 | 0 | return true; | 2146 | 38 | } else { | 2147 | 38 | parents.emplace_back(find_result.value); | 2148 | 38 | } | 2149 | | | 2150 | 38 | return build_parents_by_path(find_result.value, path, parents); | 2151 | 39 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2126 | 132 | std::vector<const JsonbValue*>& parents) const { | 2127 | 132 | const size_t index = parents.size() - 1; | 2128 | 132 | if (index == path.get_leg_vector_size()) { | 2129 | 50 | return true; | 2130 | 50 | } | 2131 | | | 2132 | 82 | JsonbPath current; | 2133 | 82 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2134 | 82 | std::unique_ptr<leg_info> leg = | 2135 | 82 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2136 | 82 | current_leg->array_index, current_leg->type); | 2137 | 82 | current.add_leg_to_leg_vector(std::move(leg)); | 2138 | | | 2139 | 82 | auto find_result = root->findValue(current); | 2140 | 82 | if (!find_result.value) { | 2141 | 11 | std::string path_string; | 2142 | 11 | current.to_string(&path_string); | 2143 | 11 | return false; | 2144 | 71 | } else if (find_result.value == root) { | 2145 | 3 | return true; | 2146 | 68 | } else { | 2147 | 68 | parents.emplace_back(find_result.value); | 2148 | 68 | } | 2149 | | | 2150 | 68 | return build_parents_by_path(find_result.value, path, parents); | 2151 | 82 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2126 | 131 | std::vector<const JsonbValue*>& parents) const { | 2127 | 131 | const size_t index = parents.size() - 1; | 2128 | 131 | if (index == path.get_leg_vector_size()) { | 2129 | 53 | return true; | 2130 | 53 | } | 2131 | | | 2132 | 78 | JsonbPath current; | 2133 | 78 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2134 | 78 | std::unique_ptr<leg_info> leg = | 2135 | 78 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2136 | 78 | current_leg->array_index, current_leg->type); | 2137 | 78 | current.add_leg_to_leg_vector(std::move(leg)); | 2138 | | | 2139 | 78 | auto find_result = root->findValue(current); | 2140 | 78 | if (!find_result.value) { | 2141 | 0 | std::string path_string; | 2142 | 0 | current.to_string(&path_string); | 2143 | 0 | return false; | 2144 | 78 | } else if (find_result.value == root) { | 2145 | 3 | return true; | 2146 | 75 | } else { | 2147 | 75 | parents.emplace_back(find_result.value); | 2148 | 75 | } | 2149 | | | 2150 | 75 | return build_parents_by_path(find_result.value, path, parents); | 2151 | 78 | } |
|
2152 | | |
2153 | | Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents, |
2154 | | const size_t parent_index, const JsonbValue* value, const bool replace, |
2155 | 330 | const leg_info* last_leg, JsonbWriter& writer) const { |
2156 | 330 | if (parent_index >= parents.size()) { |
2157 | 0 | return Status::InvalidArgument( |
2158 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", |
2159 | 0 | parent_index, parents.size()); |
2160 | 0 | } |
2161 | | |
2162 | 330 | if (parents[parent_index] != root) { |
2163 | 0 | return Status::InvalidArgument( |
2164 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " |
2165 | 0 | "parents size: {}", |
2166 | 0 | parent_index, parents.size()); |
2167 | 0 | } |
2168 | | |
2169 | 330 | if (parent_index == parents.size() - 1 && replace) { |
2170 | | // We are at the last parent, write the value directly |
2171 | 100 | if (value == nullptr) { |
2172 | 24 | writer.writeNull(); |
2173 | 76 | } else { |
2174 | 76 | writer.writeValue(value); |
2175 | 76 | } |
2176 | 100 | return Status::OK(); |
2177 | 100 | } |
2178 | | |
2179 | 230 | bool value_written = false; |
2180 | 230 | bool is_last_parent = (parent_index == parents.size() - 1); |
2181 | 230 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; |
2182 | 230 | if (root->isArray()) { |
2183 | 21 | writer.writeStartArray(); |
2184 | 21 | const auto* array_val = root->unpack<ArrayVal>(); |
2185 | 63 | for (int i = 0; i != array_val->numElem(); ++i) { |
2186 | 42 | auto* it = array_val->get(i); |
2187 | | |
2188 | 42 | if (is_last_parent && last_leg->array_index == i) { |
2189 | 0 | value_written = true; |
2190 | 0 | writer.writeValue(value); |
2191 | 42 | } else if (it == next_parent) { |
2192 | 13 | value_written = true; |
2193 | 13 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, |
2194 | 13 | last_leg, writer)); |
2195 | 29 | } else { |
2196 | 29 | writer.writeValue(it); |
2197 | 29 | } |
2198 | 42 | } |
2199 | 21 | if (is_last_parent && !value_written) { |
2200 | 8 | value_written = true; |
2201 | 8 | writer.writeValue(value); |
2202 | 8 | } |
2203 | | |
2204 | 21 | writer.writeEndArray(); |
2205 | | |
2206 | 209 | } else { |
2207 | | /** |
2208 | | Because even for a non-array object, `$[0]` can still point to that object: |
2209 | | ``` |
2210 | | select json_extract('{"key": "value"}', '$[0]'); |
2211 | | +------------------------------------------+ |
2212 | | | json_extract('{"key": "value"}', '$[0]') | |
2213 | | +------------------------------------------+ |
2214 | | | {"key": "value"} | |
2215 | | +------------------------------------------+ |
2216 | | ``` |
2217 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, |
2218 | | it should be converted to an array before insertion: |
2219 | | ``` |
2220 | | select json_insert('123','$[1]', null); |
2221 | | +---------------------------------+ |
2222 | | | json_insert('123','$[1]', null) | |
2223 | | +---------------------------------+ |
2224 | | | [123, null] | |
2225 | | +---------------------------------+ |
2226 | | ``` |
2227 | | */ |
2228 | 209 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { |
2229 | 8 | writer.writeStartArray(); |
2230 | 8 | writer.writeValue(root); |
2231 | 8 | writer.writeValue(value); |
2232 | 8 | writer.writeEndArray(); |
2233 | 8 | return Status::OK(); |
2234 | 201 | } else if (root->isObject()) { |
2235 | 201 | writer.writeStartObject(); |
2236 | 201 | const auto* object_val = root->unpack<ObjectVal>(); |
2237 | 399 | for (const auto& it : *object_val) { |
2238 | 399 | writer.writeKey(it.getKeyStr(), it.klen()); |
2239 | 399 | if (it.value() == next_parent) { |
2240 | 168 | value_written = true; |
2241 | 168 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, |
2242 | 168 | value, replace, last_leg, writer)); |
2243 | 231 | } else { |
2244 | 231 | writer.writeValue(it.value()); |
2245 | 231 | } |
2246 | 399 | } |
2247 | | |
2248 | 201 | if (is_last_parent && !value_written) { |
2249 | 33 | value_written = true; |
2250 | 33 | writer.writeStartObject(); |
2251 | 33 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); |
2252 | 33 | writer.writeValue(value); |
2253 | 33 | writer.writeEndObject(); |
2254 | 33 | } |
2255 | 201 | writer.writeEndObject(); |
2256 | | |
2257 | 201 | } else { |
2258 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); |
2259 | 0 | } |
2260 | 209 | } |
2261 | | |
2262 | 222 | if (!value_written) { |
2263 | 0 | return Status::InvalidArgument( |
2264 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", |
2265 | 0 | parent_index, parents.size()); |
2266 | 0 | } |
2267 | | |
2268 | 222 | return Status::OK(); |
2269 | 222 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2155 | 78 | const leg_info* last_leg, JsonbWriter& writer) const { | 2156 | 78 | if (parent_index >= parents.size()) { | 2157 | 0 | return Status::InvalidArgument( | 2158 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2159 | 0 | parent_index, parents.size()); | 2160 | 0 | } | 2161 | | | 2162 | 78 | if (parents[parent_index] != root) { | 2163 | 0 | return Status::InvalidArgument( | 2164 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2165 | 0 | "parents size: {}", | 2166 | 0 | parent_index, parents.size()); | 2167 | 0 | } | 2168 | | | 2169 | 78 | if (parent_index == parents.size() - 1 && replace) { | 2170 | | // We are at the last parent, write the value directly | 2171 | 0 | if (value == nullptr) { | 2172 | 0 | writer.writeNull(); | 2173 | 0 | } else { | 2174 | 0 | writer.writeValue(value); | 2175 | 0 | } | 2176 | 0 | return Status::OK(); | 2177 | 0 | } | 2178 | | | 2179 | 78 | bool value_written = false; | 2180 | 78 | bool is_last_parent = (parent_index == parents.size() - 1); | 2181 | 78 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2182 | 78 | if (root->isArray()) { | 2183 | 4 | writer.writeStartArray(); | 2184 | 4 | const auto* array_val = root->unpack<ArrayVal>(); | 2185 | 12 | for (int i = 0; i != array_val->numElem(); ++i) { | 2186 | 8 | auto* it = array_val->get(i); | 2187 | | | 2188 | 8 | if (is_last_parent && last_leg->array_index == i) { | 2189 | 0 | value_written = true; | 2190 | 0 | writer.writeValue(value); | 2191 | 8 | } else if (it == next_parent) { | 2192 | 0 | value_written = true; | 2193 | 0 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2194 | 0 | last_leg, writer)); | 2195 | 8 | } else { | 2196 | 8 | writer.writeValue(it); | 2197 | 8 | } | 2198 | 8 | } | 2199 | 4 | if (is_last_parent && !value_written) { | 2200 | 4 | value_written = true; | 2201 | 4 | writer.writeValue(value); | 2202 | 4 | } | 2203 | | | 2204 | 4 | writer.writeEndArray(); | 2205 | | | 2206 | 74 | } else { | 2207 | | /** | 2208 | | Because even for a non-array object, `$[0]` can still point to that object: | 2209 | | ``` | 2210 | | select json_extract('{"key": "value"}', '$[0]'); | 2211 | | +------------------------------------------+ | 2212 | | | json_extract('{"key": "value"}', '$[0]') | | 2213 | | +------------------------------------------+ | 2214 | | | {"key": "value"} | | 2215 | | +------------------------------------------+ | 2216 | | ``` | 2217 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2218 | | it should be converted to an array before insertion: | 2219 | | ``` | 2220 | | select json_insert('123','$[1]', null); | 2221 | | +---------------------------------+ | 2222 | | | json_insert('123','$[1]', null) | | 2223 | | +---------------------------------+ | 2224 | | | [123, null] | | 2225 | | +---------------------------------+ | 2226 | | ``` | 2227 | | */ | 2228 | 74 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2229 | 4 | writer.writeStartArray(); | 2230 | 4 | writer.writeValue(root); | 2231 | 4 | writer.writeValue(value); | 2232 | 4 | writer.writeEndArray(); | 2233 | 4 | return Status::OK(); | 2234 | 70 | } else if (root->isObject()) { | 2235 | 70 | writer.writeStartObject(); | 2236 | 70 | const auto* object_val = root->unpack<ObjectVal>(); | 2237 | 70 | for (const auto& it : *object_val) { | 2238 | 68 | writer.writeKey(it.getKeyStr(), it.klen()); | 2239 | 68 | if (it.value() == next_parent) { | 2240 | 38 | value_written = true; | 2241 | 38 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2242 | 38 | value, replace, last_leg, writer)); | 2243 | 38 | } else { | 2244 | 30 | writer.writeValue(it.value()); | 2245 | 30 | } | 2246 | 68 | } | 2247 | | | 2248 | 70 | if (is_last_parent && !value_written) { | 2249 | 32 | value_written = true; | 2250 | 32 | writer.writeStartObject(); | 2251 | 32 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2252 | 32 | writer.writeValue(value); | 2253 | 32 | writer.writeEndObject(); | 2254 | 32 | } | 2255 | 70 | writer.writeEndObject(); | 2256 | | | 2257 | 70 | } else { | 2258 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2259 | 0 | } | 2260 | 74 | } | 2261 | | | 2262 | 74 | if (!value_written) { | 2263 | 0 | return Status::InvalidArgument( | 2264 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2265 | 0 | parent_index, parents.size()); | 2266 | 0 | } | 2267 | | | 2268 | 74 | return Status::OK(); | 2269 | 74 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2155 | 121 | const leg_info* last_leg, JsonbWriter& writer) const { | 2156 | 121 | if (parent_index >= parents.size()) { | 2157 | 0 | return Status::InvalidArgument( | 2158 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2159 | 0 | parent_index, parents.size()); | 2160 | 0 | } | 2161 | | | 2162 | 121 | if (parents[parent_index] != root) { | 2163 | 0 | return Status::InvalidArgument( | 2164 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2165 | 0 | "parents size: {}", | 2166 | 0 | parent_index, parents.size()); | 2167 | 0 | } | 2168 | | | 2169 | 121 | if (parent_index == parents.size() - 1 && replace) { | 2170 | | // We are at the last parent, write the value directly | 2171 | 44 | if (value == nullptr) { | 2172 | 10 | writer.writeNull(); | 2173 | 34 | } else { | 2174 | 34 | writer.writeValue(value); | 2175 | 34 | } | 2176 | 44 | return Status::OK(); | 2177 | 44 | } | 2178 | | | 2179 | 77 | bool value_written = false; | 2180 | 77 | bool is_last_parent = (parent_index == parents.size() - 1); | 2181 | 77 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2182 | 77 | if (root->isArray()) { | 2183 | 9 | writer.writeStartArray(); | 2184 | 9 | const auto* array_val = root->unpack<ArrayVal>(); | 2185 | 27 | for (int i = 0; i != array_val->numElem(); ++i) { | 2186 | 18 | auto* it = array_val->get(i); | 2187 | | | 2188 | 18 | if (is_last_parent && last_leg->array_index == i) { | 2189 | 0 | value_written = true; | 2190 | 0 | writer.writeValue(value); | 2191 | 18 | } else if (it == next_parent) { | 2192 | 5 | value_written = true; | 2193 | 5 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2194 | 5 | last_leg, writer)); | 2195 | 13 | } else { | 2196 | 13 | writer.writeValue(it); | 2197 | 13 | } | 2198 | 18 | } | 2199 | 9 | if (is_last_parent && !value_written) { | 2200 | 4 | value_written = true; | 2201 | 4 | writer.writeValue(value); | 2202 | 4 | } | 2203 | | | 2204 | 9 | writer.writeEndArray(); | 2205 | | | 2206 | 68 | } else { | 2207 | | /** | 2208 | | Because even for a non-array object, `$[0]` can still point to that object: | 2209 | | ``` | 2210 | | select json_extract('{"key": "value"}', '$[0]'); | 2211 | | +------------------------------------------+ | 2212 | | | json_extract('{"key": "value"}', '$[0]') | | 2213 | | +------------------------------------------+ | 2214 | | | {"key": "value"} | | 2215 | | +------------------------------------------+ | 2216 | | ``` | 2217 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2218 | | it should be converted to an array before insertion: | 2219 | | ``` | 2220 | | select json_insert('123','$[1]', null); | 2221 | | +---------------------------------+ | 2222 | | | json_insert('123','$[1]', null) | | 2223 | | +---------------------------------+ | 2224 | | | [123, null] | | 2225 | | +---------------------------------+ | 2226 | | ``` | 2227 | | */ | 2228 | 68 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2229 | 4 | writer.writeStartArray(); | 2230 | 4 | writer.writeValue(root); | 2231 | 4 | writer.writeValue(value); | 2232 | 4 | writer.writeEndArray(); | 2233 | 4 | return Status::OK(); | 2234 | 64 | } else if (root->isObject()) { | 2235 | 64 | writer.writeStartObject(); | 2236 | 64 | const auto* object_val = root->unpack<ObjectVal>(); | 2237 | 156 | for (const auto& it : *object_val) { | 2238 | 156 | writer.writeKey(it.getKeyStr(), it.klen()); | 2239 | 156 | if (it.value() == next_parent) { | 2240 | 63 | value_written = true; | 2241 | 63 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2242 | 63 | value, replace, last_leg, writer)); | 2243 | 93 | } else { | 2244 | 93 | writer.writeValue(it.value()); | 2245 | 93 | } | 2246 | 156 | } | 2247 | | | 2248 | 64 | if (is_last_parent && !value_written) { | 2249 | 1 | value_written = true; | 2250 | 1 | writer.writeStartObject(); | 2251 | 1 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2252 | 1 | writer.writeValue(value); | 2253 | 1 | writer.writeEndObject(); | 2254 | 1 | } | 2255 | 64 | writer.writeEndObject(); | 2256 | | | 2257 | 64 | } else { | 2258 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2259 | 0 | } | 2260 | 68 | } | 2261 | | | 2262 | 73 | if (!value_written) { | 2263 | 0 | return Status::InvalidArgument( | 2264 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2265 | 0 | parent_index, parents.size()); | 2266 | 0 | } | 2267 | | | 2268 | 73 | return Status::OK(); | 2269 | 73 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2155 | 131 | const leg_info* last_leg, JsonbWriter& writer) const { | 2156 | 131 | if (parent_index >= parents.size()) { | 2157 | 0 | return Status::InvalidArgument( | 2158 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2159 | 0 | parent_index, parents.size()); | 2160 | 0 | } | 2161 | | | 2162 | 131 | if (parents[parent_index] != root) { | 2163 | 0 | return Status::InvalidArgument( | 2164 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2165 | 0 | "parents size: {}", | 2166 | 0 | parent_index, parents.size()); | 2167 | 0 | } | 2168 | | | 2169 | 131 | if (parent_index == parents.size() - 1 && replace) { | 2170 | | // We are at the last parent, write the value directly | 2171 | 56 | if (value == nullptr) { | 2172 | 14 | writer.writeNull(); | 2173 | 42 | } else { | 2174 | 42 | writer.writeValue(value); | 2175 | 42 | } | 2176 | 56 | return Status::OK(); | 2177 | 56 | } | 2178 | | | 2179 | 75 | bool value_written = false; | 2180 | 75 | bool is_last_parent = (parent_index == parents.size() - 1); | 2181 | 75 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2182 | 75 | if (root->isArray()) { | 2183 | 8 | writer.writeStartArray(); | 2184 | 8 | const auto* array_val = root->unpack<ArrayVal>(); | 2185 | 24 | for (int i = 0; i != array_val->numElem(); ++i) { | 2186 | 16 | auto* it = array_val->get(i); | 2187 | | | 2188 | 16 | if (is_last_parent && last_leg->array_index == i) { | 2189 | 0 | value_written = true; | 2190 | 0 | writer.writeValue(value); | 2191 | 16 | } else if (it == next_parent) { | 2192 | 8 | value_written = true; | 2193 | 8 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2194 | 8 | last_leg, writer)); | 2195 | 8 | } else { | 2196 | 8 | writer.writeValue(it); | 2197 | 8 | } | 2198 | 16 | } | 2199 | 8 | if (is_last_parent && !value_written) { | 2200 | 0 | value_written = true; | 2201 | 0 | writer.writeValue(value); | 2202 | 0 | } | 2203 | | | 2204 | 8 | writer.writeEndArray(); | 2205 | | | 2206 | 67 | } else { | 2207 | | /** | 2208 | | Because even for a non-array object, `$[0]` can still point to that object: | 2209 | | ``` | 2210 | | select json_extract('{"key": "value"}', '$[0]'); | 2211 | | +------------------------------------------+ | 2212 | | | json_extract('{"key": "value"}', '$[0]') | | 2213 | | +------------------------------------------+ | 2214 | | | {"key": "value"} | | 2215 | | +------------------------------------------+ | 2216 | | ``` | 2217 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2218 | | it should be converted to an array before insertion: | 2219 | | ``` | 2220 | | select json_insert('123','$[1]', null); | 2221 | | +---------------------------------+ | 2222 | | | json_insert('123','$[1]', null) | | 2223 | | +---------------------------------+ | 2224 | | | [123, null] | | 2225 | | +---------------------------------+ | 2226 | | ``` | 2227 | | */ | 2228 | 67 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2229 | 0 | writer.writeStartArray(); | 2230 | 0 | writer.writeValue(root); | 2231 | 0 | writer.writeValue(value); | 2232 | 0 | writer.writeEndArray(); | 2233 | 0 | return Status::OK(); | 2234 | 67 | } else if (root->isObject()) { | 2235 | 67 | writer.writeStartObject(); | 2236 | 67 | const auto* object_val = root->unpack<ObjectVal>(); | 2237 | 175 | for (const auto& it : *object_val) { | 2238 | 175 | writer.writeKey(it.getKeyStr(), it.klen()); | 2239 | 175 | if (it.value() == next_parent) { | 2240 | 67 | value_written = true; | 2241 | 67 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2242 | 67 | value, replace, last_leg, writer)); | 2243 | 108 | } else { | 2244 | 108 | writer.writeValue(it.value()); | 2245 | 108 | } | 2246 | 175 | } | 2247 | | | 2248 | 67 | if (is_last_parent && !value_written) { | 2249 | 0 | value_written = true; | 2250 | 0 | writer.writeStartObject(); | 2251 | 0 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2252 | 0 | writer.writeValue(value); | 2253 | 0 | writer.writeEndObject(); | 2254 | 0 | } | 2255 | 67 | writer.writeEndObject(); | 2256 | | | 2257 | 67 | } else { | 2258 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2259 | 0 | } | 2260 | 67 | } | 2261 | | | 2262 | 75 | if (!value_written) { | 2263 | 0 | return Status::InvalidArgument( | 2264 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2265 | 0 | parent_index, parents.size()); | 2266 | 0 | } | 2267 | | | 2268 | 75 | return Status::OK(); | 2269 | 75 | } |
|
2270 | | |
2271 | | Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths, |
2272 | | DorisVector<DorisVector<const JsonbValue*>>& json_values, |
2273 | | const ColumnNumbers& arguments, const size_t input_rows_count, |
2274 | | const std::vector<const ColumnString*>& json_path_columns, |
2275 | | const std::vector<bool>& json_path_constant, |
2276 | | const std::vector<const NullMap*>& json_path_null_maps, |
2277 | | const std::vector<const ColumnString*>& json_value_columns, |
2278 | | const std::vector<bool>& json_value_constant, |
2279 | 83 | const std::vector<const NullMap*>& json_value_null_maps) const { |
2280 | 183 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2281 | 108 | const size_t index = i / 2; |
2282 | 108 | const auto* json_path_column = json_path_columns[index]; |
2283 | 108 | const auto* value_column = json_value_columns[index]; |
2284 | | |
2285 | 108 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); |
2286 | 108 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); |
2287 | | |
2288 | 217 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { |
2289 | 117 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { |
2290 | 6 | continue; |
2291 | 6 | } |
2292 | | |
2293 | 111 | auto path_string = json_path_column->get_data_at(row_idx); |
2294 | 111 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { |
2295 | 3 | return Status::InvalidArgument( |
2296 | 3 | "Json path error: Invalid Json Path for value: {}, " |
2297 | 3 | "argument " |
2298 | 3 | "index: {}, row index: {}", |
2299 | 3 | std::string_view(path_string.data, path_string.size), i, row_idx); |
2300 | 3 | } |
2301 | | |
2302 | 108 | if (json_paths[index][row_idx].is_wildcard()) { |
2303 | 5 | return Status::InvalidArgument( |
2304 | 5 | "In this situation, path expressions may not contain the * and ** " |
2305 | 5 | "tokens, argument index: {}, row index: {}", |
2306 | 5 | i, row_idx); |
2307 | 5 | } |
2308 | 108 | } |
2309 | | |
2310 | 294 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { |
2311 | 194 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { |
2312 | 48 | continue; |
2313 | 48 | } |
2314 | | |
2315 | 146 | auto value_string = value_column->get_data_at(row_idx); |
2316 | 146 | const JsonbDocument* doc = nullptr; |
2317 | 146 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
2318 | 146 | value_string.size, &doc)); |
2319 | 146 | if (doc) { |
2320 | 146 | json_values[index][row_idx] = doc->getValue(); |
2321 | 146 | } |
2322 | 146 | } |
2323 | 100 | } |
2324 | | |
2325 | 75 | return Status::OK(); |
2326 | 83 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2279 | 28 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2280 | 59 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2281 | 35 | const size_t index = i / 2; | 2282 | 35 | const auto* json_path_column = json_path_columns[index]; | 2283 | 35 | const auto* value_column = json_value_columns[index]; | 2284 | | | 2285 | 35 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2286 | 35 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2287 | | | 2288 | 75 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2289 | 44 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2290 | 4 | continue; | 2291 | 4 | } | 2292 | | | 2293 | 40 | auto path_string = json_path_column->get_data_at(row_idx); | 2294 | 40 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2295 | 1 | return Status::InvalidArgument( | 2296 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2297 | 1 | "argument " | 2298 | 1 | "index: {}, row index: {}", | 2299 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2300 | 1 | } | 2301 | | | 2302 | 39 | if (json_paths[index][row_idx].is_wildcard()) { | 2303 | 3 | return Status::InvalidArgument( | 2304 | 3 | "In this situation, path expressions may not contain the * and ** " | 2305 | 3 | "tokens, argument index: {}, row index: {}", | 2306 | 3 | i, row_idx); | 2307 | 3 | } | 2308 | 39 | } | 2309 | | | 2310 | 92 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2311 | 61 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2312 | 14 | continue; | 2313 | 14 | } | 2314 | | | 2315 | 47 | auto value_string = value_column->get_data_at(row_idx); | 2316 | 47 | const JsonbDocument* doc = nullptr; | 2317 | 47 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2318 | 47 | value_string.size, &doc)); | 2319 | 47 | if (doc) { | 2320 | 47 | json_values[index][row_idx] = doc->getValue(); | 2321 | 47 | } | 2322 | 47 | } | 2323 | 31 | } | 2324 | | | 2325 | 24 | return Status::OK(); | 2326 | 28 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2279 | 27 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2280 | 60 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2281 | 35 | const size_t index = i / 2; | 2282 | 35 | const auto* json_path_column = json_path_columns[index]; | 2283 | 35 | const auto* value_column = json_value_columns[index]; | 2284 | | | 2285 | 35 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2286 | 35 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2287 | | | 2288 | 68 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2289 | 35 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2290 | 1 | continue; | 2291 | 1 | } | 2292 | | | 2293 | 34 | auto path_string = json_path_column->get_data_at(row_idx); | 2294 | 34 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2295 | 1 | return Status::InvalidArgument( | 2296 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2297 | 1 | "argument " | 2298 | 1 | "index: {}, row index: {}", | 2299 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2300 | 1 | } | 2301 | | | 2302 | 33 | if (json_paths[index][row_idx].is_wildcard()) { | 2303 | 1 | return Status::InvalidArgument( | 2304 | 1 | "In this situation, path expressions may not contain the * and ** " | 2305 | 1 | "tokens, argument index: {}, row index: {}", | 2306 | 1 | i, row_idx); | 2307 | 1 | } | 2308 | 33 | } | 2309 | | | 2310 | 98 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2311 | 65 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2312 | 16 | continue; | 2313 | 16 | } | 2314 | | | 2315 | 49 | auto value_string = value_column->get_data_at(row_idx); | 2316 | 49 | const JsonbDocument* doc = nullptr; | 2317 | 49 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2318 | 49 | value_string.size, &doc)); | 2319 | 49 | if (doc) { | 2320 | 49 | json_values[index][row_idx] = doc->getValue(); | 2321 | 49 | } | 2322 | 49 | } | 2323 | 33 | } | 2324 | | | 2325 | 25 | return Status::OK(); | 2326 | 27 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2279 | 28 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2280 | 64 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2281 | 38 | const size_t index = i / 2; | 2282 | 38 | const auto* json_path_column = json_path_columns[index]; | 2283 | 38 | const auto* value_column = json_value_columns[index]; | 2284 | | | 2285 | 38 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2286 | 38 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2287 | | | 2288 | 74 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2289 | 38 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2290 | 1 | continue; | 2291 | 1 | } | 2292 | | | 2293 | 37 | auto path_string = json_path_column->get_data_at(row_idx); | 2294 | 37 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2295 | 1 | return Status::InvalidArgument( | 2296 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2297 | 1 | "argument " | 2298 | 1 | "index: {}, row index: {}", | 2299 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2300 | 1 | } | 2301 | | | 2302 | 36 | if (json_paths[index][row_idx].is_wildcard()) { | 2303 | 1 | return Status::InvalidArgument( | 2304 | 1 | "In this situation, path expressions may not contain the * and ** " | 2305 | 1 | "tokens, argument index: {}, row index: {}", | 2306 | 1 | i, row_idx); | 2307 | 1 | } | 2308 | 36 | } | 2309 | | | 2310 | 104 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2311 | 68 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2312 | 18 | continue; | 2313 | 18 | } | 2314 | | | 2315 | 50 | auto value_string = value_column->get_data_at(row_idx); | 2316 | 50 | const JsonbDocument* doc = nullptr; | 2317 | 50 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2318 | 50 | value_string.size, &doc)); | 2319 | 50 | if (doc) { | 2320 | 50 | json_values[index][row_idx] = doc->getValue(); | 2321 | 50 | } | 2322 | 50 | } | 2323 | 36 | } | 2324 | | | 2325 | 26 | return Status::OK(); | 2326 | 28 | } |
|
2327 | | }; |
2328 | | |
2329 | | struct JsonbContainsAndPathImpl { |
2330 | 57 | static DataTypes get_variadic_argument_types() { |
2331 | 57 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(), |
2332 | 57 | std::make_shared<DataTypeString>()}; |
2333 | 57 | } |
2334 | | |
2335 | | static Status execute_impl(FunctionContext* context, Block& block, |
2336 | | const ColumnNumbers& arguments, uint32_t result, |
2337 | 127 | size_t input_rows_count) { |
2338 | 127 | return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result, |
2339 | 127 | input_rows_count); |
2340 | 127 | } |
2341 | | }; |
2342 | | |
2343 | | class FunctionJsonSearch : public IFunction { |
2344 | | private: |
2345 | | using OneFun = std::function<Status(size_t, bool*)>; |
2346 | 65 | static Status always_one(size_t i, bool* res) { |
2347 | 65 | *res = true; |
2348 | 65 | return Status::OK(); |
2349 | 65 | } |
2350 | 48 | static Status always_all(size_t i, bool* res) { |
2351 | 48 | *res = false; |
2352 | 48 | return Status::OK(); |
2353 | 48 | } |
2354 | | |
2355 | | using CheckNullFun = std::function<bool(size_t)>; |
2356 | 294 | static bool always_not_null(size_t) { return false; } |
2357 | | |
2358 | | using GetJsonStringRefFun = std::function<StringRef(size_t)>; |
2359 | | |
2360 | 329 | Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const { |
2361 | 329 | StringRef pattern; // not used |
2362 | 329 | StringRef value_val(str.data(), str.size()); |
2363 | 329 | return (state->scalar_function)(&state->search_state, value_val, pattern, res); |
2364 | 329 | } |
2365 | | |
2366 | | /** |
2367 | | * Recursive search for matching string, if found, the result will be added to a vector |
2368 | | * @param element json element |
2369 | | * @param one_match |
2370 | | * @param search_str |
2371 | | * @param cur_path |
2372 | | * @param matches The path that has already been matched |
2373 | | * @return true if matched else false |
2374 | | */ |
2375 | | bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state, |
2376 | 717 | JsonbPath* cur_path, std::unordered_set<std::string>* matches) const { |
2377 | 717 | if (element->isString()) { |
2378 | 329 | const auto* json_string = element->unpack<JsonbStringVal>(); |
2379 | 329 | const std::string_view element_str(json_string->getBlob(), json_string->length()); |
2380 | 329 | unsigned char res; |
2381 | 329 | RETURN_IF_ERROR(matched(element_str, state, &res)); |
2382 | 329 | if (res) { |
2383 | 206 | std::string str; |
2384 | 206 | auto valid = cur_path->to_string(&str); |
2385 | 206 | if (!valid) { |
2386 | 0 | return false; |
2387 | 0 | } |
2388 | 206 | return matches->insert(str).second; |
2389 | 206 | } else { |
2390 | 123 | return false; |
2391 | 123 | } |
2392 | 388 | } else if (element->isObject()) { |
2393 | 195 | const auto* object = element->unpack<ObjectVal>(); |
2394 | 195 | bool find = false; |
2395 | 201 | for (const auto& item : *object) { |
2396 | 201 | Slice key(item.getKeyStr(), item.klen()); |
2397 | 201 | const auto* child_element = item.value(); |
2398 | | // construct an object member path leg. |
2399 | 201 | auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE); |
2400 | 201 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2401 | 201 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2402 | 201 | cur_path->pop_leg_from_leg_vector(); |
2403 | 201 | if (one_match && find) { |
2404 | 3 | return true; |
2405 | 3 | } |
2406 | 201 | } |
2407 | 192 | return find; |
2408 | 195 | } else if (element->isArray()) { |
2409 | 193 | const auto* array = element->unpack<ArrayVal>(); |
2410 | 193 | bool find = false; |
2411 | 512 | for (int i = 0; i < array->numElem(); ++i) { |
2412 | 385 | auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE); |
2413 | 385 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2414 | 385 | const auto* child_element = array->get(i); |
2415 | | // construct an array cell path leg. |
2416 | 385 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2417 | 385 | cur_path->pop_leg_from_leg_vector(); |
2418 | 385 | if (one_match && find) { |
2419 | 66 | return true; |
2420 | 66 | } |
2421 | 385 | } |
2422 | 127 | return find; |
2423 | 193 | } else { |
2424 | 0 | return false; |
2425 | 0 | } |
2426 | 717 | } |
2427 | | |
2428 | | void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches, |
2429 | 117 | ColumnString* result_col) const { |
2430 | 117 | if (matches.size() == 1) { |
2431 | 86 | for (const auto& str_ref : matches) { |
2432 | 86 | writer.writeStartString(); |
2433 | 86 | writer.writeString(str_ref); |
2434 | 86 | writer.writeEndString(); |
2435 | 86 | } |
2436 | 86 | } else { |
2437 | 31 | writer.writeStartArray(); |
2438 | 120 | for (const auto& str_ref : matches) { |
2439 | 120 | writer.writeStartString(); |
2440 | 120 | writer.writeString(str_ref); |
2441 | 120 | writer.writeEndString(); |
2442 | 120 | } |
2443 | 31 | writer.writeEndArray(); |
2444 | 31 | } |
2445 | | |
2446 | 117 | result_col->insert_data(writer.getOutput()->getBuffer(), |
2447 | 117 | (size_t)writer.getOutput()->getSize()); |
2448 | 117 | } |
2449 | | |
2450 | | template <bool search_is_const> |
2451 | | Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check, |
2452 | | GetJsonStringRefFun col_json_string, CheckNullFun one_null_check, |
2453 | | OneFun one_check, CheckNullFun search_null_check, |
2454 | | const ColumnString* col_search_string, FunctionContext* context, |
2455 | 47 | size_t result) const { |
2456 | 47 | auto result_col = ColumnString::create(); |
2457 | 47 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
2458 | | |
2459 | 47 | std::shared_ptr<LikeState> state_ptr; |
2460 | 47 | LikeState* state = nullptr; |
2461 | 47 | if (search_is_const) { |
2462 | 8 | state = reinterpret_cast<LikeState*>( |
2463 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
2464 | 8 | } |
2465 | | |
2466 | 47 | bool is_one = false; |
2467 | | |
2468 | 47 | JsonbWriter writer; |
2469 | 204 | for (size_t i = 0; i < input_rows_count; ++i) { |
2470 | | // an error occurs if the json_doc argument is not a valid json document. |
2471 | 159 | if (json_null_check(i)) { |
2472 | 12 | null_map->get_data()[i] = 1; |
2473 | 12 | result_col->insert_data("", 0); |
2474 | 12 | continue; |
2475 | 12 | } |
2476 | 147 | const auto& json_doc_str = col_json_string(i); |
2477 | 147 | const JsonbDocument* json_doc = nullptr; |
2478 | 147 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, |
2479 | 147 | &json_doc); |
2480 | 147 | if (!st.ok()) { |
2481 | 0 | return Status::InvalidArgument( |
2482 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, |
2483 | 0 | st.to_string()); |
2484 | 0 | } |
2485 | | |
2486 | 147 | if (!one_null_check(i)) { |
2487 | 145 | RETURN_IF_ERROR(one_check(i, &is_one)); |
2488 | 145 | } |
2489 | | |
2490 | 145 | if (one_null_check(i) || search_null_check(i)) { |
2491 | 14 | null_map->get_data()[i] = 1; |
2492 | 14 | result_col->insert_data("", 0); |
2493 | 14 | continue; |
2494 | 14 | } |
2495 | | |
2496 | | // an error occurs if any path argument is not a valid path expression. |
2497 | 131 | std::string root_path_str = "$"; |
2498 | 131 | JsonbPath root_path; |
2499 | 131 | root_path.seek(root_path_str.c_str(), root_path_str.size()); |
2500 | 131 | std::vector<JsonbPath*> paths; |
2501 | 131 | paths.push_back(&root_path); |
2502 | | |
2503 | 131 | if (!search_is_const) { |
2504 | 99 | state_ptr = std::make_shared<LikeState>(); |
2505 | 99 | state_ptr->is_like_pattern = true; |
2506 | 99 | const auto& search_str = col_search_string->get_data_at(i); |
2507 | 99 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, |
2508 | 99 | state_ptr, false)); |
2509 | 99 | state = state_ptr.get(); |
2510 | 99 | } |
2511 | | |
2512 | | // maintain a hashset to deduplicate matches. |
2513 | 131 | std::unordered_set<std::string> matches; |
2514 | 131 | for (const auto& item : paths) { |
2515 | 131 | auto* cur_path = item; |
2516 | 131 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); |
2517 | 131 | if (is_one && find) { |
2518 | 66 | break; |
2519 | 66 | } |
2520 | 131 | } |
2521 | 131 | if (matches.empty()) { |
2522 | | // returns NULL if the search_str is not found in the document. |
2523 | 14 | null_map->get_data()[i] = 1; |
2524 | 14 | result_col->insert_data("", 0); |
2525 | 14 | continue; |
2526 | 14 | } |
2527 | | |
2528 | 117 | writer.reset(); |
2529 | 117 | make_result_str(writer, matches, result_col.get()); |
2530 | 117 | } |
2531 | 45 | auto result_col_nullable = |
2532 | 45 | ColumnNullable::create(std::move(result_col), std::move(null_map)); |
2533 | 45 | block.replace_by_position(result, std::move(result_col_nullable)); |
2534 | 45 | return Status::OK(); |
2535 | 47 | } _ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Line | Count | Source | 2455 | 8 | size_t result) const { | 2456 | 8 | auto result_col = ColumnString::create(); | 2457 | 8 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 2458 | | | 2459 | 8 | std::shared_ptr<LikeState> state_ptr; | 2460 | 8 | LikeState* state = nullptr; | 2461 | 8 | if (search_is_const) { | 2462 | 8 | state = reinterpret_cast<LikeState*>( | 2463 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 2464 | 8 | } | 2465 | | | 2466 | 8 | bool is_one = false; | 2467 | | | 2468 | 8 | JsonbWriter writer; | 2469 | 44 | for (size_t i = 0; i < input_rows_count; ++i) { | 2470 | | // an error occurs if the json_doc argument is not a valid json document. | 2471 | 36 | if (json_null_check(i)) { | 2472 | 4 | null_map->get_data()[i] = 1; | 2473 | 4 | result_col->insert_data("", 0); | 2474 | 4 | continue; | 2475 | 4 | } | 2476 | 32 | const auto& json_doc_str = col_json_string(i); | 2477 | 32 | const JsonbDocument* json_doc = nullptr; | 2478 | 32 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, | 2479 | 32 | &json_doc); | 2480 | 32 | if (!st.ok()) { | 2481 | 0 | return Status::InvalidArgument( | 2482 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, | 2483 | 0 | st.to_string()); | 2484 | 0 | } | 2485 | | | 2486 | 32 | if (!one_null_check(i)) { | 2487 | 32 | RETURN_IF_ERROR(one_check(i, &is_one)); | 2488 | 32 | } | 2489 | | | 2490 | 32 | if (one_null_check(i) || search_null_check(i)) { | 2491 | 0 | null_map->get_data()[i] = 1; | 2492 | 0 | result_col->insert_data("", 0); | 2493 | 0 | continue; | 2494 | 0 | } | 2495 | | | 2496 | | // an error occurs if any path argument is not a valid path expression. | 2497 | 32 | std::string root_path_str = "$"; | 2498 | 32 | JsonbPath root_path; | 2499 | 32 | root_path.seek(root_path_str.c_str(), root_path_str.size()); | 2500 | 32 | std::vector<JsonbPath*> paths; | 2501 | 32 | paths.push_back(&root_path); | 2502 | | | 2503 | 32 | if (!search_is_const) { | 2504 | 0 | state_ptr = std::make_shared<LikeState>(); | 2505 | 0 | state_ptr->is_like_pattern = true; | 2506 | 0 | const auto& search_str = col_search_string->get_data_at(i); | 2507 | 0 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, | 2508 | 0 | state_ptr, false)); | 2509 | 0 | state = state_ptr.get(); | 2510 | 0 | } | 2511 | | | 2512 | | // maintain a hashset to deduplicate matches. | 2513 | 32 | std::unordered_set<std::string> matches; | 2514 | 32 | for (const auto& item : paths) { | 2515 | 32 | auto* cur_path = item; | 2516 | 32 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); | 2517 | 32 | if (is_one && find) { | 2518 | 16 | break; | 2519 | 16 | } | 2520 | 32 | } | 2521 | 32 | if (matches.empty()) { | 2522 | | // returns NULL if the search_str is not found in the document. | 2523 | 0 | null_map->get_data()[i] = 1; | 2524 | 0 | result_col->insert_data("", 0); | 2525 | 0 | continue; | 2526 | 0 | } | 2527 | | | 2528 | 32 | writer.reset(); | 2529 | 32 | make_result_str(writer, matches, result_col.get()); | 2530 | 32 | } | 2531 | 8 | auto result_col_nullable = | 2532 | 8 | ColumnNullable::create(std::move(result_col), std::move(null_map)); | 2533 | 8 | block.replace_by_position(result, std::move(result_col_nullable)); | 2534 | 8 | return Status::OK(); | 2535 | 8 | } |
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Line | Count | Source | 2455 | 39 | size_t result) const { | 2456 | 39 | auto result_col = ColumnString::create(); | 2457 | 39 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 2458 | | | 2459 | 39 | std::shared_ptr<LikeState> state_ptr; | 2460 | 39 | LikeState* state = nullptr; | 2461 | 39 | if (search_is_const) { | 2462 | 0 | state = reinterpret_cast<LikeState*>( | 2463 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 2464 | 0 | } | 2465 | | | 2466 | 39 | bool is_one = false; | 2467 | | | 2468 | 39 | JsonbWriter writer; | 2469 | 160 | for (size_t i = 0; i < input_rows_count; ++i) { | 2470 | | // an error occurs if the json_doc argument is not a valid json document. | 2471 | 123 | if (json_null_check(i)) { | 2472 | 8 | null_map->get_data()[i] = 1; | 2473 | 8 | result_col->insert_data("", 0); | 2474 | 8 | continue; | 2475 | 8 | } | 2476 | 115 | const auto& json_doc_str = col_json_string(i); | 2477 | 115 | const JsonbDocument* json_doc = nullptr; | 2478 | 115 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, | 2479 | 115 | &json_doc); | 2480 | 115 | if (!st.ok()) { | 2481 | 0 | return Status::InvalidArgument( | 2482 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, | 2483 | 0 | st.to_string()); | 2484 | 0 | } | 2485 | | | 2486 | 115 | if (!one_null_check(i)) { | 2487 | 113 | RETURN_IF_ERROR(one_check(i, &is_one)); | 2488 | 113 | } | 2489 | | | 2490 | 113 | if (one_null_check(i) || search_null_check(i)) { | 2491 | 14 | null_map->get_data()[i] = 1; | 2492 | 14 | result_col->insert_data("", 0); | 2493 | 14 | continue; | 2494 | 14 | } | 2495 | | | 2496 | | // an error occurs if any path argument is not a valid path expression. | 2497 | 99 | std::string root_path_str = "$"; | 2498 | 99 | JsonbPath root_path; | 2499 | 99 | root_path.seek(root_path_str.c_str(), root_path_str.size()); | 2500 | 99 | std::vector<JsonbPath*> paths; | 2501 | 99 | paths.push_back(&root_path); | 2502 | | | 2503 | 99 | if (!search_is_const) { | 2504 | 99 | state_ptr = std::make_shared<LikeState>(); | 2505 | 99 | state_ptr->is_like_pattern = true; | 2506 | 99 | const auto& search_str = col_search_string->get_data_at(i); | 2507 | 99 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, | 2508 | 99 | state_ptr, false)); | 2509 | 99 | state = state_ptr.get(); | 2510 | 99 | } | 2511 | | | 2512 | | // maintain a hashset to deduplicate matches. | 2513 | 99 | std::unordered_set<std::string> matches; | 2514 | 99 | for (const auto& item : paths) { | 2515 | 99 | auto* cur_path = item; | 2516 | 99 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); | 2517 | 99 | if (is_one && find) { | 2518 | 50 | break; | 2519 | 50 | } | 2520 | 99 | } | 2521 | 99 | if (matches.empty()) { | 2522 | | // returns NULL if the search_str is not found in the document. | 2523 | 14 | null_map->get_data()[i] = 1; | 2524 | 14 | result_col->insert_data("", 0); | 2525 | 14 | continue; | 2526 | 14 | } | 2527 | | | 2528 | 85 | writer.reset(); | 2529 | 85 | make_result_str(writer, matches, result_col.get()); | 2530 | 85 | } | 2531 | 37 | auto result_col_nullable = | 2532 | 37 | ColumnNullable::create(std::move(result_col), std::move(null_map)); | 2533 | 37 | block.replace_by_position(result, std::move(result_col_nullable)); | 2534 | 37 | return Status::OK(); | 2535 | 39 | } |
|
2536 | | |
2537 | | static constexpr auto one = "one"; |
2538 | | static constexpr auto all = "all"; |
2539 | | |
2540 | | public: |
2541 | | static constexpr auto name = "json_search"; |
2542 | 53 | static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); } |
2543 | | |
2544 | 1 | String get_name() const override { return name; } |
2545 | 45 | bool is_variadic() const override { return false; } |
2546 | 44 | size_t get_number_of_arguments() const override { return 3; } |
2547 | | |
2548 | 44 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2549 | 44 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2550 | 44 | } |
2551 | | |
2552 | 102 | bool use_default_implementation_for_nulls() const override { return false; } |
2553 | | |
2554 | 191 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
2555 | 191 | if (scope != FunctionContext::THREAD_LOCAL) { |
2556 | 44 | return Status::OK(); |
2557 | 44 | } |
2558 | 147 | if (context->is_col_constant(2)) { |
2559 | 67 | std::shared_ptr<LikeState> state = std::make_shared<LikeState>(); |
2560 | 67 | state->is_like_pattern = true; |
2561 | 67 | const auto pattern_col = context->get_constant_col(2)->column_ptr; |
2562 | 67 | const auto& pattern = pattern_col->get_data_at(0); |
2563 | 67 | RETURN_IF_ERROR( |
2564 | 67 | FunctionLike::construct_like_const_state(context, pattern, state, false)); |
2565 | 67 | context->set_function_state(scope, state); |
2566 | 67 | } |
2567 | 147 | return Status::OK(); |
2568 | 147 | } |
2569 | | |
2570 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2571 | 58 | uint32_t result, size_t input_rows_count) const override { |
2572 | | // the json_doc, one_or_all, and search_str must be given. |
2573 | | // and we require the positions are static. |
2574 | 58 | if (arguments.size() < 3) { |
2575 | 0 | return Status::InvalidArgument("too few arguments for function {}", name); |
2576 | 0 | } |
2577 | 58 | if (arguments.size() > 3) { |
2578 | 0 | return Status::NotSupported("escape and path params are not support now"); |
2579 | 0 | } |
2580 | | |
2581 | 58 | CheckNullFun json_null_check = always_not_null; |
2582 | 58 | GetJsonStringRefFun get_json_fun; |
2583 | | // prepare jsonb data column |
2584 | 58 | auto&& [col_json, json_is_const] = |
2585 | 58 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2586 | 58 | const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get()); |
2587 | 58 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) { |
2588 | 58 | col_json_string = |
2589 | 58 | check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get()); |
2590 | 58 | } |
2591 | | |
2592 | 58 | if (!col_json_string) { |
2593 | 0 | return Status::RuntimeError("Illegal arg json {} should be ColumnString", |
2594 | 0 | col_json->get_name()); |
2595 | 0 | } |
2596 | | |
2597 | 58 | auto create_all_null_result = [&]() { |
2598 | 6 | auto res_str = ColumnString::create(); |
2599 | 6 | res_str->insert_default(); |
2600 | 6 | auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1)); |
2601 | 6 | if (input_rows_count > 1) { |
2602 | 6 | block.get_by_position(result).column = |
2603 | 6 | ColumnConst::create(std::move(res), input_rows_count); |
2604 | 6 | } else { |
2605 | 0 | block.get_by_position(result).column = std::move(res); |
2606 | 0 | } |
2607 | 6 | return Status::OK(); |
2608 | 6 | }; |
2609 | | |
2610 | 58 | if (json_is_const) { |
2611 | 11 | if (col_json->is_null_at(0)) { |
2612 | 2 | return create_all_null_result(); |
2613 | 9 | } else { |
2614 | 9 | const auto& json_str = col_json_string->get_data_at(0); |
2615 | 36 | get_json_fun = [json_str](size_t i) { return json_str; }; |
2616 | 9 | } |
2617 | 47 | } else { |
2618 | 123 | json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); }; |
2619 | 111 | get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); }; |
2620 | 47 | } |
2621 | | |
2622 | | // one_or_all |
2623 | 56 | CheckNullFun one_null_check = always_not_null; |
2624 | 56 | OneFun one_check = always_one; |
2625 | 56 | auto&& [col_one, one_is_const] = |
2626 | 56 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2627 | 56 | one_is_const |= input_rows_count == 1; |
2628 | 56 | const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get()); |
2629 | 56 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) { |
2630 | 9 | col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2631 | 9 | } |
2632 | 56 | if (!col_one_string) { |
2633 | 0 | return Status::RuntimeError("Illegal arg one {} should be ColumnString", |
2634 | 0 | col_one->get_name()); |
2635 | 0 | } |
2636 | 56 | if (one_is_const) { |
2637 | 46 | if (col_one->is_null_at(0)) { |
2638 | 4 | return create_all_null_result(); |
2639 | 42 | } else { |
2640 | 42 | const auto& one_or_all = col_one_string->get_data_at(0); |
2641 | 42 | std::string one_or_all_str = one_or_all.to_string(); |
2642 | 42 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2643 | 17 | one_check = always_all; |
2644 | 25 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2645 | | // nothing |
2646 | 20 | } else { |
2647 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2648 | 5 | return Status::InvalidArgument( |
2649 | 5 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2650 | 5 | } |
2651 | 42 | } |
2652 | 46 | } else { |
2653 | 66 | one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); }; |
2654 | 32 | one_check = [col_one_string](size_t i, bool* is_one) { |
2655 | 32 | const auto& one_or_all = col_one_string->get_data_at(i); |
2656 | 32 | std::string one_or_all_str = one_or_all.to_string(); |
2657 | 32 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2658 | 18 | *is_one = false; |
2659 | 18 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2660 | 12 | *is_one = true; |
2661 | 12 | } else { |
2662 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2663 | 2 | return Status::InvalidArgument( |
2664 | 2 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2665 | 2 | } |
2666 | 30 | return Status::OK(); |
2667 | 32 | }; |
2668 | 10 | } |
2669 | | |
2670 | | // search_str |
2671 | 47 | auto&& [col_search, search_is_const] = |
2672 | 47 | unpack_if_const(block.get_by_position(arguments[2]).column); |
2673 | | |
2674 | 47 | const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get()); |
2675 | 47 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) { |
2676 | 26 | col_search_string = |
2677 | 26 | check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2678 | 26 | } |
2679 | 47 | if (!col_search_string) { |
2680 | 0 | return Status::RuntimeError("Illegal arg pattern {} should be ColumnString", |
2681 | 0 | col_search->get_name()); |
2682 | 0 | } |
2683 | 47 | if (search_is_const) { |
2684 | 8 | CheckNullFun search_null_check = always_not_null; |
2685 | 8 | if (col_search->is_null_at(0)) { |
2686 | 0 | return create_all_null_result(); |
2687 | 0 | } |
2688 | 8 | RETURN_IF_ERROR(execute_vector<true>( |
2689 | 8 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2690 | 8 | one_check, search_null_check, col_search_string, context, result)); |
2691 | 39 | } else { |
2692 | 111 | CheckNullFun search_null_check = [col_search](size_t i) { |
2693 | 111 | return col_search->is_null_at(i); |
2694 | 111 | }; |
2695 | 39 | RETURN_IF_ERROR(execute_vector<false>( |
2696 | 39 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2697 | 39 | one_check, search_null_check, col_search_string, context, result)); |
2698 | 39 | } |
2699 | 45 | return Status::OK(); |
2700 | 47 | } |
2701 | | }; |
2702 | | |
2703 | | struct DocumentBuffer { |
2704 | | std::unique_ptr<char[]> ptr; |
2705 | | size_t size = 0; |
2706 | | size_t capacity = 0; |
2707 | | }; |
2708 | | |
2709 | | class FunctionJsonbRemove : public IFunction { |
2710 | | public: |
2711 | | static constexpr auto name = "jsonb_remove"; |
2712 | | static constexpr auto alias = "json_remove"; |
2713 | | |
2714 | 31 | static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); } |
2715 | | |
2716 | 0 | String get_name() const override { return name; } |
2717 | | |
2718 | 0 | size_t get_number_of_arguments() const override { return 0; } |
2719 | 23 | bool is_variadic() const override { return true; } |
2720 | | |
2721 | 44 | bool use_default_implementation_for_nulls() const override { return false; } |
2722 | | |
2723 | 22 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2724 | 22 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2725 | 22 | } |
2726 | | |
2727 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2728 | 22 | uint32_t result, size_t input_rows_count) const override { |
2729 | 22 | DCHECK_GE(arguments.size(), 2); |
2730 | | |
2731 | | // Check if arguments count is valid (json_doc + at least one path) |
2732 | 22 | if (arguments.size() < 2) { |
2733 | 0 | return Status::InvalidArgument("json_remove requires at least 2 arguments"); |
2734 | 0 | } |
2735 | | |
2736 | 22 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
2737 | 22 | auto result_column = return_data_type->create_column(); |
2738 | 22 | auto& nullable_column = assert_cast<ColumnNullable&>(*result_column); |
2739 | 22 | auto& res_chars = |
2740 | 22 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars(); |
2741 | 22 | auto& res_offsets = |
2742 | 22 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets(); |
2743 | 22 | auto& null_map = nullable_column.get_null_map_data(); |
2744 | | |
2745 | 22 | res_chars.reserve(input_rows_count * 64); |
2746 | 22 | res_offsets.resize(input_rows_count); |
2747 | 22 | null_map.resize_fill(input_rows_count, 0); |
2748 | | |
2749 | | // Get JSON document column |
2750 | 22 | auto [json_column, json_const] = |
2751 | 22 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2752 | 22 | const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get()); |
2753 | 22 | const ColumnString* json_data_column = nullptr; |
2754 | 22 | const NullMap* json_null_map = nullptr; |
2755 | | |
2756 | 22 | if (json_nullable) { |
2757 | 22 | json_null_map = &json_nullable->get_null_map_data(); |
2758 | 22 | json_data_column = |
2759 | 22 | check_and_get_column<ColumnString>(&json_nullable->get_nested_column()); |
2760 | 22 | } else { |
2761 | 0 | json_data_column = check_and_get_column<ColumnString>(json_column.get()); |
2762 | 0 | } |
2763 | | |
2764 | 22 | if (!json_data_column) { |
2765 | 0 | return Status::InvalidArgument("First argument must be a JSON document"); |
2766 | 0 | } |
2767 | | |
2768 | | // Parse paths |
2769 | 22 | std::vector<const ColumnString*> path_columns; |
2770 | 22 | std::vector<const NullMap*> path_null_maps; |
2771 | 22 | std::vector<bool> path_constants; |
2772 | | |
2773 | 51 | for (size_t i = 1; i < arguments.size(); ++i) { |
2774 | 29 | auto [path_column, path_const] = |
2775 | 29 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2776 | 29 | const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get()); |
2777 | | |
2778 | 29 | if (path_nullable) { |
2779 | 6 | path_null_maps.push_back(&path_nullable->get_null_map_data()); |
2780 | 6 | path_columns.push_back( |
2781 | 6 | check_and_get_column<ColumnString>(&path_nullable->get_nested_column())); |
2782 | 23 | } else { |
2783 | 23 | path_null_maps.push_back(nullptr); |
2784 | 23 | path_columns.push_back(check_and_get_column<ColumnString>(path_column.get())); |
2785 | 23 | } |
2786 | | |
2787 | 29 | if (!path_columns.back()) { |
2788 | 0 | return Status::InvalidArgument( |
2789 | 0 | fmt::format("Argument {} must be a string path", i + 1)); |
2790 | 0 | } |
2791 | | |
2792 | 29 | path_constants.push_back(path_const); |
2793 | 29 | } |
2794 | | |
2795 | | // Reusable JsonbWriter for performance |
2796 | 22 | JsonbWriter writer; |
2797 | | |
2798 | 48 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { |
2799 | 28 | size_t json_idx = index_check_const(row_idx, json_const); |
2800 | | |
2801 | | // Check if JSON document is null |
2802 | 28 | if (json_null_map && (*json_null_map)[json_idx]) { |
2803 | 2 | null_map[row_idx] = 1; |
2804 | 2 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2805 | 2 | continue; |
2806 | 2 | } |
2807 | | |
2808 | | // Parse JSON document |
2809 | 26 | const auto& json_data = json_data_column->get_data_at(json_idx); |
2810 | 26 | const JsonbDocument* json_doc = nullptr; |
2811 | 26 | Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data, |
2812 | 26 | json_data.size, &json_doc); |
2813 | | |
2814 | 26 | if (!parse_status.ok() || !json_doc) { |
2815 | 0 | null_map[row_idx] = 1; |
2816 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2817 | 0 | continue; |
2818 | 0 | } |
2819 | | |
2820 | | // Check if any path is null |
2821 | 26 | bool has_null_path = false; |
2822 | 59 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2823 | 35 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2824 | 35 | if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) { |
2825 | 2 | has_null_path = true; |
2826 | 2 | break; |
2827 | 2 | } |
2828 | 35 | } |
2829 | | |
2830 | 26 | if (has_null_path) { |
2831 | 2 | null_map[row_idx] = 1; |
2832 | 2 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2833 | 2 | continue; |
2834 | 2 | } |
2835 | | |
2836 | 24 | std::vector<JsonbPath> paths; |
2837 | 24 | std::vector<bool> path_constants_vec; |
2838 | | |
2839 | 54 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2840 | 32 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2841 | 32 | const auto& path_data = path_columns[path_idx]->get_data_at(idx); |
2842 | | |
2843 | 32 | JsonbPath path; |
2844 | 32 | if (!path.seek(path_data.data, path_data.size)) { |
2845 | 1 | return Status::InvalidArgument( |
2846 | 1 | "Json path error: Invalid Json Path for value: {} at row: {}", |
2847 | 1 | std::string_view(path_data.data, path_data.size), row_idx); |
2848 | 1 | } |
2849 | | |
2850 | 31 | if (path.is_wildcard() || path.is_supper_wildcard()) { |
2851 | 1 | return Status::InvalidArgument( |
2852 | 1 | "In this situation, path expressions may not contain the * and ** " |
2853 | 1 | "tokens or an array range, argument index: {}, row index: {}", |
2854 | 1 | path_idx + 1, row_idx); |
2855 | 1 | } |
2856 | | |
2857 | 30 | paths.push_back(std::move(path)); |
2858 | 30 | path_constants_vec.push_back(path_constants[path_idx]); |
2859 | 30 | } |
2860 | | |
2861 | 22 | const JsonbValue* current_value = json_doc->getValue(); |
2862 | | |
2863 | 22 | DocumentBuffer tmp_buffer; |
2864 | | |
2865 | 52 | for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) { |
2866 | 30 | writer.reset(); |
2867 | | |
2868 | 30 | auto find_result = current_value->findValue(paths[path_idx]); |
2869 | | |
2870 | 30 | if (find_result.is_wildcard) { |
2871 | 0 | continue; |
2872 | 0 | } |
2873 | | |
2874 | 30 | if (find_result.value) { |
2875 | 24 | RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer)); |
2876 | | |
2877 | 24 | auto* writer_output = writer.getOutput(); |
2878 | 24 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2879 | 17 | tmp_buffer.capacity = |
2880 | 17 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2881 | 17 | tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity); |
2882 | 17 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2883 | 17 | } |
2884 | | |
2885 | 24 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), |
2886 | 24 | writer_output->getSize()); |
2887 | 24 | tmp_buffer.size = writer_output->getSize(); |
2888 | | |
2889 | 24 | const JsonbDocument* new_doc = nullptr; |
2890 | 24 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2891 | 24 | tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc)); |
2892 | | |
2893 | 24 | current_value = new_doc->getValue(); |
2894 | 24 | } |
2895 | 30 | } |
2896 | | |
2897 | 22 | const JsonbDocument* modified_doc = nullptr; |
2898 | 22 | if (current_value != json_doc->getValue()) { |
2899 | 17 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2900 | 17 | tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc)); |
2901 | 17 | } else { |
2902 | 5 | modified_doc = json_doc; |
2903 | 5 | } |
2904 | | |
2905 | | // Write the final result |
2906 | 22 | const auto size = modified_doc->numPackedBytes(); |
2907 | 22 | res_chars.insert(reinterpret_cast<const char*>(modified_doc), |
2908 | 22 | reinterpret_cast<const char*>(modified_doc) + size); |
2909 | 22 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2910 | 22 | } |
2911 | | |
2912 | 20 | block.get_by_position(result).column = std::move(result_column); |
2913 | 20 | return Status::OK(); |
2914 | 22 | } |
2915 | | |
2916 | | private: |
2917 | | Status clone_without_path(const JsonbValue* root, const JsonbPath& path, |
2918 | 24 | JsonbWriter& writer) const { |
2919 | | // Start writing at the root level |
2920 | 24 | if (root->isObject()) { |
2921 | 15 | writer.writeStartObject(); |
2922 | 15 | RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer)); |
2923 | 15 | writer.writeEndObject(); |
2924 | 15 | } else if (root->isArray()) { |
2925 | 9 | writer.writeStartArray(); |
2926 | 9 | RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer)); |
2927 | 9 | writer.writeEndArray(); |
2928 | 9 | } else { |
2929 | | // Primitive value - can't remove anything from it |
2930 | 0 | writer.writeValue(root); |
2931 | 0 | } |
2932 | 24 | return Status::OK(); |
2933 | 24 | } |
2934 | | |
2935 | | Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path, |
2936 | 20 | size_t depth, JsonbWriter& writer) const { |
2937 | 20 | const auto* obj = obj_value->unpack<ObjectVal>(); |
2938 | | |
2939 | 40 | for (const auto& kv : *obj) { |
2940 | 40 | std::string key(kv.getKeyStr(), kv.klen()); |
2941 | | |
2942 | 40 | if (depth < path.get_leg_vector_size()) { |
2943 | 40 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2944 | 40 | if (leg->type == MEMBER_CODE) { |
2945 | 40 | std::string target_key(leg->leg_ptr, leg->leg_len); |
2946 | | |
2947 | 40 | if (key == target_key) { |
2948 | 20 | if (depth == path.get_leg_vector_size() - 1) { |
2949 | 12 | continue; |
2950 | 12 | } else { |
2951 | 8 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2952 | 8 | if (kv.value()->isObject()) { |
2953 | 3 | writer.writeStartObject(); |
2954 | 3 | RETURN_IF_ERROR(clone_object_without_path(kv.value(), path, |
2955 | 3 | depth + 1, writer)); |
2956 | 3 | writer.writeEndObject(); |
2957 | 5 | } else if (kv.value()->isArray()) { |
2958 | 5 | writer.writeStartArray(); |
2959 | 5 | RETURN_IF_ERROR(clone_array_without_path(kv.value(), path, |
2960 | 5 | depth + 1, writer)); |
2961 | 5 | writer.writeEndArray(); |
2962 | 5 | } else { |
2963 | 0 | writer.writeValue(kv.value()); |
2964 | 0 | } |
2965 | 8 | } |
2966 | 20 | } else { |
2967 | 20 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2968 | 20 | writer.writeValue(kv.value()); |
2969 | 20 | } |
2970 | 40 | } else { |
2971 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2972 | 0 | writer.writeValue(kv.value()); |
2973 | 0 | } |
2974 | 40 | } else { |
2975 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2976 | 0 | writer.writeValue(kv.value()); |
2977 | 0 | } |
2978 | 40 | } |
2979 | | |
2980 | 20 | return Status::OK(); |
2981 | 20 | } |
2982 | | |
2983 | | Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path, |
2984 | 17 | size_t depth, JsonbWriter& writer) const { |
2985 | 17 | const auto* arr = arr_value->unpack<ArrayVal>(); |
2986 | | |
2987 | 17 | int index = 0; |
2988 | 52 | for (const auto& element : *arr) { |
2989 | 52 | if (depth < path.get_leg_vector_size()) { |
2990 | 52 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2991 | 52 | if (leg->type == ARRAY_CODE) { |
2992 | 52 | int target_index = leg->array_index; |
2993 | | |
2994 | 52 | if (index == target_index) { |
2995 | 17 | if (depth == path.get_leg_vector_size() - 1) { |
2996 | | // This is the target element to remove - skip it |
2997 | 12 | } else { |
2998 | 5 | if (element.isObject()) { |
2999 | 2 | writer.writeStartObject(); |
3000 | 2 | RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1, |
3001 | 2 | writer)); |
3002 | 2 | writer.writeEndObject(); |
3003 | 3 | } else if (element.isArray()) { |
3004 | 3 | writer.writeStartArray(); |
3005 | 3 | RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1, |
3006 | 3 | writer)); |
3007 | 3 | writer.writeEndArray(); |
3008 | 3 | } else { |
3009 | 0 | writer.writeValue(&element); |
3010 | 0 | } |
3011 | 5 | } |
3012 | 35 | } else { |
3013 | 35 | writer.writeValue(&element); |
3014 | 35 | } |
3015 | 52 | } else { |
3016 | 0 | writer.writeValue(&element); |
3017 | 0 | } |
3018 | 52 | } else { |
3019 | 0 | writer.writeValue(&element); |
3020 | 0 | } |
3021 | 52 | index++; |
3022 | 52 | } |
3023 | | |
3024 | 17 | return Status::OK(); |
3025 | 17 | } |
3026 | | }; |
3027 | | |
3028 | | class FunctionStripNullValue : public IFunction { |
3029 | | public: |
3030 | | static constexpr auto name = "strip_null_value"; |
3031 | 24 | static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); } |
3032 | | |
3033 | 1 | String get_name() const override { return name; } |
3034 | 16 | bool is_variadic() const override { return false; } |
3035 | 15 | size_t get_number_of_arguments() const override { return 1; } |
3036 | | |
3037 | 30 | bool use_default_implementation_for_nulls() const override { return false; } |
3038 | | |
3039 | 15 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3040 | 15 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
3041 | 15 | } |
3042 | | |
3043 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3044 | 15 | uint32_t result, size_t input_rows_count) const override { |
3045 | 15 | const auto& arg_column = block.get_by_position(arguments[0]).column; |
3046 | 15 | const ColumnString* json_column = nullptr; |
3047 | 15 | const NullMap* json_null_map = nullptr; |
3048 | 15 | if (arg_column->is_nullable()) { |
3049 | 15 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column); |
3050 | 15 | json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column()); |
3051 | 15 | json_null_map = &nullable_col.get_null_map_data(); |
3052 | 15 | } else { |
3053 | 0 | json_column = assert_cast<const ColumnString*>(arg_column.get()); |
3054 | 0 | } |
3055 | | |
3056 | 15 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
3057 | 15 | auto result_column = return_data_type->create_column(); |
3058 | | |
3059 | 15 | auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data(); |
3060 | 15 | auto& result_data_col = assert_cast<ColumnString&>( |
3061 | 15 | assert_cast<ColumnNullable&>(*result_column).get_nested_column()); |
3062 | | |
3063 | 15 | result_nullmap.resize_fill(input_rows_count, 0); |
3064 | 60 | for (size_t i = 0; i != input_rows_count; ++i) { |
3065 | 45 | if (json_null_map && (*json_null_map)[i]) { |
3066 | 13 | result_nullmap[i] = 1; |
3067 | 13 | result_data_col.insert_default(); |
3068 | 13 | continue; |
3069 | 13 | } |
3070 | 32 | const JsonbDocument* json_doc = nullptr; |
3071 | 32 | const auto& json_str = json_column->get_data_at(i); |
3072 | 32 | RETURN_IF_ERROR( |
3073 | 32 | JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc)); |
3074 | 32 | if (json_doc) [[likely]] { |
3075 | 32 | if (json_doc->getValue()->isNull()) { |
3076 | 9 | result_nullmap[i] = 1; |
3077 | 9 | result_data_col.insert_default(); |
3078 | 23 | } else { |
3079 | 23 | result_nullmap[i] = 0; |
3080 | 23 | result_data_col.insert_data(json_str.data, json_str.size); |
3081 | 23 | } |
3082 | 32 | } else { |
3083 | 0 | result_nullmap[i] = 1; |
3084 | 0 | result_data_col.insert_default(); |
3085 | 0 | } |
3086 | 32 | } |
3087 | | |
3088 | 15 | block.get_by_position(result).column = std::move(result_column); |
3089 | 15 | return Status::OK(); |
3090 | 15 | } |
3091 | | }; |
3092 | | |
3093 | 8 | void register_function_jsonb(SimpleFunctionFactory& factory) { |
3094 | 8 | factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name); |
3095 | 8 | factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias); |
3096 | 8 | factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null"); |
3097 | 8 | factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null"); |
3098 | 8 | factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value"); |
3099 | 8 | factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value"); |
3100 | | |
3101 | 8 | factory.register_function<FunctionJsonbExists>(); |
3102 | 8 | factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias); |
3103 | 8 | factory.register_function<FunctionJsonbType>(); |
3104 | 8 | factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias); |
3105 | | |
3106 | 8 | factory.register_function<FunctionJsonbKeys>(); |
3107 | 8 | factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias); |
3108 | | |
3109 | 8 | factory.register_function<FunctionJsonbExtractIsnull>(); |
3110 | 8 | factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias); |
3111 | | |
3112 | 8 | factory.register_function<FunctionJsonbExtractJsonb>(); |
3113 | 8 | factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias); |
3114 | 8 | factory.register_function<FunctionJsonbExtractJsonbNoQuotes>(); |
3115 | 8 | factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name, |
3116 | 8 | FunctionJsonbExtractJsonbNoQuotes::alias); |
3117 | | |
3118 | 8 | factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>(); |
3119 | 8 | factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>(); |
3120 | | |
3121 | 8 | factory.register_function<FunctionJsonSearch>(); |
3122 | | |
3123 | 8 | factory.register_function<FunctionJsonbArray<false>>(); |
3124 | 8 | factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias); |
3125 | | |
3126 | 8 | factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null"); |
3127 | 8 | factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null"); |
3128 | | |
3129 | 8 | factory.register_function<FunctionJsonbObject>(); |
3130 | 8 | factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias); |
3131 | | |
3132 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>(); |
3133 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name, |
3134 | 8 | FunctionJsonbModify<JsonbModifyType::Insert>::alias); |
3135 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>(); |
3136 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name, |
3137 | 8 | FunctionJsonbModify<JsonbModifyType::Set>::alias); |
3138 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>(); |
3139 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name, |
3140 | 8 | FunctionJsonbModify<JsonbModifyType::Replace>::alias); |
3141 | | |
3142 | 8 | factory.register_function<FunctionJsonbRemove>(); |
3143 | 8 | factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias); |
3144 | | |
3145 | 8 | factory.register_function<FunctionStripNullValue>(); |
3146 | 8 | } |
3147 | | |
3148 | | } // namespace doris |