be/src/exprs/function/function_jsonb.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstdlib> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <string_view> |
25 | | #include <tuple> |
26 | | #include <type_traits> |
27 | | #include <utility> |
28 | | #include <variant> |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "common/status.h" |
32 | | #include "core/assert_cast.h" |
33 | | #include "core/block/block.h" |
34 | | #include "core/block/column_numbers.h" |
35 | | #include "core/block/column_with_type_and_name.h" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_array.h" |
38 | | #include "core/column/column_const.h" |
39 | | #include "core/column/column_nullable.h" |
40 | | #include "core/column/column_string.h" |
41 | | #include "core/column/column_vector.h" |
42 | | #include "core/custom_allocator.h" |
43 | | #include "core/data_type/data_type.h" |
44 | | #include "core/data_type/data_type_array.h" |
45 | | #include "core/data_type/data_type_jsonb.h" |
46 | | #include "core/data_type/data_type_nullable.h" |
47 | | #include "core/data_type/data_type_string.h" |
48 | | #include "core/data_type/define_primitive_type.h" |
49 | | #include "core/data_type/primitive_type.h" |
50 | | #include "core/string_ref.h" |
51 | | #include "core/types.h" |
52 | | #include "core/value/jsonb_value.h" |
53 | | #include "exec/common/stringop_substring.h" |
54 | | #include "exec/common/template_helpers.hpp" |
55 | | #include "exec/common/util.hpp" |
56 | | #include "exprs/aggregate/aggregate_function.h" |
57 | | #include "exprs/function/function.h" |
58 | | #include "exprs/function/like.h" |
59 | | #include "exprs/function/simple_function_factory.h" |
60 | | #include "exprs/function_context.h" |
61 | | #include "util/jsonb_document.h" |
62 | | #include "util/jsonb_stream.h" |
63 | | #include "util/jsonb_utils.h" |
64 | | #include "util/jsonb_writer.h" |
65 | | #include "util/simd/bits.h" |
66 | | |
67 | | namespace doris { |
68 | | |
69 | | enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT }; |
70 | | |
71 | | enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE }; |
72 | | |
73 | | // func(string,string) -> json |
74 | | template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode> |
75 | | class FunctionJsonbParseBase : public IFunction { |
76 | | private: |
77 | | struct FunctionJsonbParseState { |
78 | | StringRef default_value; |
79 | | JsonBinaryValue default_value_parser; |
80 | | bool has_const_default_value = false; |
81 | | bool default_is_null = false; |
82 | | }; |
83 | | |
84 | | public: |
85 | | static constexpr auto name = "json_parse"; |
86 | | static constexpr auto alias = "jsonb_parse"; |
87 | 88 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv Line | Count | Source | 87 | 27 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv Line | Count | Source | 87 | 39 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv Line | Count | Source | 87 | 22 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
|
88 | | |
89 | 4 | String get_name() const override { |
90 | 4 | String error_mode; |
91 | 4 | switch (parse_error_handle_mode) { |
92 | 1 | case JsonbParseErrorMode::FAIL: |
93 | 1 | break; |
94 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
95 | 1 | error_mode = "_error_to_null"; |
96 | 1 | break; |
97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: |
98 | 2 | error_mode = "_error_to_value"; |
99 | 2 | break; |
100 | 4 | } |
101 | | |
102 | 4 | return name + error_mode; |
103 | 4 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 1 | case JsonbParseErrorMode::FAIL: | 93 | 1 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 1 | String get_name() const override { | 90 | 1 | String error_mode; | 91 | 1 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 1 | error_mode = "_error_to_null"; | 96 | 1 | break; | 97 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 0 | error_mode = "_error_to_value"; | 99 | 0 | break; | 100 | 1 | } | 101 | | | 102 | 1 | return name + error_mode; | 103 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev Line | Count | Source | 89 | 2 | String get_name() const override { | 90 | 2 | String error_mode; | 91 | 2 | switch (parse_error_handle_mode) { | 92 | 0 | case JsonbParseErrorMode::FAIL: | 93 | 0 | break; | 94 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 95 | 0 | error_mode = "_error_to_null"; | 96 | 0 | break; | 97 | 2 | case JsonbParseErrorMode::RETURN_VALUE: | 98 | 2 | error_mode = "_error_to_value"; | 99 | 2 | break; | 100 | 2 | } | 101 | | | 102 | 2 | return name + error_mode; | 103 | 2 | } |
|
104 | | |
105 | 65 | bool is_variadic() const override { |
106 | 65 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; |
107 | 65 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv Line | Count | Source | 105 | 19 | bool is_variadic() const override { | 106 | 19 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 19 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv Line | Count | Source | 105 | 31 | bool is_variadic() const override { | 106 | 31 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 31 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv Line | Count | Source | 105 | 15 | bool is_variadic() const override { | 106 | 15 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 107 | 15 | } |
|
108 | | |
109 | 49 | size_t get_number_of_arguments() const override { |
110 | 49 | switch (parse_error_handle_mode) { |
111 | 18 | case JsonbParseErrorMode::FAIL: |
112 | 18 | return 1; |
113 | 30 | case JsonbParseErrorMode::RETURN_NULL: |
114 | 30 | return 1; |
115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: |
116 | 1 | return 0; |
117 | 49 | } |
118 | 49 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv Line | Count | Source | 109 | 18 | size_t get_number_of_arguments() const override { | 110 | 18 | switch (parse_error_handle_mode) { | 111 | 18 | case JsonbParseErrorMode::FAIL: | 112 | 18 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 18 | } | 118 | 18 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv Line | Count | Source | 109 | 30 | size_t get_number_of_arguments() const override { | 110 | 30 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 30 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 30 | return 1; | 115 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 0 | return 0; | 117 | 30 | } | 118 | 30 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv Line | Count | Source | 109 | 1 | size_t get_number_of_arguments() const override { | 110 | 1 | switch (parse_error_handle_mode) { | 111 | 0 | case JsonbParseErrorMode::FAIL: | 112 | 0 | return 1; | 113 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 114 | 0 | return 1; | 115 | 1 | case JsonbParseErrorMode::RETURN_VALUE: | 116 | 1 | return 0; | 117 | 1 | } | 118 | 1 | } |
|
119 | | |
120 | 61 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
121 | 61 | bool is_nullable = false; |
122 | 61 | switch (nullable_mode) { |
123 | 30 | case NullalbeMode::NULLABLE: |
124 | 30 | is_nullable = true; |
125 | 30 | break; |
126 | 31 | case NullalbeMode::FOLLOW_INPUT: { |
127 | 43 | for (auto arg : arguments) { |
128 | 43 | is_nullable |= arg->is_nullable(); |
129 | 43 | } |
130 | 31 | break; |
131 | 0 | } |
132 | 61 | } |
133 | | |
134 | 61 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) |
135 | 61 | : std::make_shared<DataTypeJsonb>(); |
136 | 61 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 18 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 18 | bool is_nullable = false; | 122 | 18 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 18 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 18 | for (auto arg : arguments) { | 128 | 18 | is_nullable |= arg->is_nullable(); | 129 | 18 | } | 130 | 18 | break; | 131 | 0 | } | 132 | 18 | } | 133 | | | 134 | 18 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 18 | : std::make_shared<DataTypeJsonb>(); | 136 | 18 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 30 | bool is_nullable = false; | 122 | 30 | switch (nullable_mode) { | 123 | 30 | case NullalbeMode::NULLABLE: | 124 | 30 | is_nullable = true; | 125 | 30 | break; | 126 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 0 | for (auto arg : arguments) { | 128 | 0 | is_nullable |= arg->is_nullable(); | 129 | 0 | } | 130 | 0 | break; | 131 | 0 | } | 132 | 30 | } | 133 | | | 134 | 30 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 30 | : std::make_shared<DataTypeJsonb>(); | 136 | 30 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 120 | 13 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 121 | 13 | bool is_nullable = false; | 122 | 13 | switch (nullable_mode) { | 123 | 0 | case NullalbeMode::NULLABLE: | 124 | 0 | is_nullable = true; | 125 | 0 | break; | 126 | 13 | case NullalbeMode::FOLLOW_INPUT: { | 127 | 25 | for (auto arg : arguments) { | 128 | 25 | is_nullable |= arg->is_nullable(); | 129 | 25 | } | 130 | 13 | break; | 131 | 0 | } | 132 | 13 | } | 133 | | | 134 | 13 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 135 | 13 | : std::make_shared<DataTypeJsonb>(); | 136 | 13 | } |
|
137 | | |
138 | 137 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 44 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 64 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 138 | 29 | bool use_default_implementation_for_nulls() const override { return false; } |
|
139 | | |
140 | 334 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
141 | 334 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
142 | 60 | std::shared_ptr<FunctionJsonbParseState> state = |
143 | 60 | std::make_shared<FunctionJsonbParseState>(); |
144 | 60 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); |
145 | 60 | } |
146 | 334 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
147 | 165 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
148 | 12 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
149 | 12 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
150 | 12 | if (state) { |
151 | 12 | if (context->get_num_args() == 2) { |
152 | 9 | if (context->is_col_constant(1)) { |
153 | 2 | const auto default_value_col = context->get_constant_col(1)->column_ptr; |
154 | 2 | if (default_value_col->is_null_at(0)) { |
155 | 1 | state->default_is_null = true; |
156 | 1 | } else { |
157 | 1 | const auto& default_value = default_value_col->get_data_at(0); |
158 | | |
159 | 1 | state->default_value = default_value; |
160 | 1 | state->has_const_default_value = true; |
161 | 1 | } |
162 | 2 | } |
163 | 9 | } else if (context->get_num_args() == 1) { |
164 | 2 | RETURN_IF_ERROR( |
165 | 2 | state->default_value_parser.from_json_string(std::string("{}"))); |
166 | 2 | state->default_value = StringRef(state->default_value_parser.value(), |
167 | 2 | state->default_value_parser.size()); |
168 | 2 | state->has_const_default_value = true; |
169 | 2 | } |
170 | 12 | } |
171 | 12 | } |
172 | | |
173 | 165 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { |
174 | 1 | return Status::InvalidArgument( |
175 | 1 | "{} function should have 1 or 2 arguments, " |
176 | 1 | "but got {}", |
177 | 1 | get_name(), context->get_num_args()); |
178 | 1 | } |
179 | 165 | } |
180 | 164 | return Status::OK(); |
181 | 334 | } _ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 60 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 60 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 18 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 18 | std::make_shared<FunctionJsonbParseState>(); | 144 | 18 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 18 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 60 | return Status::OK(); | 181 | 60 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 109 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 109 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 30 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 30 | std::make_shared<FunctionJsonbParseState>(); | 144 | 30 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 30 | } | 146 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | | if (state) { | 151 | | if (context->get_num_args() == 2) { | 152 | | if (context->is_col_constant(1)) { | 153 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | | if (default_value_col->is_null_at(0)) { | 155 | | state->default_is_null = true; | 156 | | } else { | 157 | | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | | state->default_value = default_value; | 160 | | state->has_const_default_value = true; | 161 | | } | 162 | | } | 163 | | } else if (context->get_num_args() == 1) { | 164 | | RETURN_IF_ERROR( | 165 | | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | | state->default_value = StringRef(state->default_value_parser.value(), | 167 | | state->default_value_parser.size()); | 168 | | state->has_const_default_value = true; | 169 | | } | 170 | | } | 171 | | } | 172 | | | 173 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | | return Status::InvalidArgument( | 175 | | "{} function should have 1 or 2 arguments, " | 176 | | "but got {}", | 177 | | get_name(), context->get_num_args()); | 178 | | } | 179 | | } | 180 | 109 | return Status::OK(); | 181 | 109 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 140 | 165 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 141 | 165 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 142 | 12 | std::shared_ptr<FunctionJsonbParseState> state = | 143 | 12 | std::make_shared<FunctionJsonbParseState>(); | 144 | 12 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 145 | 12 | } | 146 | 165 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 147 | 165 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 148 | 12 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 149 | 12 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 150 | 12 | if (state) { | 151 | 12 | if (context->get_num_args() == 2) { | 152 | 9 | if (context->is_col_constant(1)) { | 153 | 2 | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 154 | 2 | if (default_value_col->is_null_at(0)) { | 155 | 1 | state->default_is_null = true; | 156 | 1 | } else { | 157 | 1 | const auto& default_value = default_value_col->get_data_at(0); | 158 | | | 159 | 1 | state->default_value = default_value; | 160 | 1 | state->has_const_default_value = true; | 161 | 1 | } | 162 | 2 | } | 163 | 9 | } else if (context->get_num_args() == 1) { | 164 | 2 | RETURN_IF_ERROR( | 165 | 2 | state->default_value_parser.from_json_string(std::string("{}"))); | 166 | 2 | state->default_value = StringRef(state->default_value_parser.value(), | 167 | 2 | state->default_value_parser.size()); | 168 | 2 | state->has_const_default_value = true; | 169 | 2 | } | 170 | 12 | } | 171 | 12 | } | 172 | | | 173 | 165 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 174 | 1 | return Status::InvalidArgument( | 175 | 1 | "{} function should have 1 or 2 arguments, " | 176 | 1 | "but got {}", | 177 | 1 | get_name(), context->get_num_args()); | 178 | 1 | } | 179 | 165 | } | 180 | 164 | return Status::OK(); | 181 | 165 | } |
|
182 | | |
183 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
184 | 76 | uint32_t result, size_t input_rows_count) const override { |
185 | 76 | auto&& [col_from, col_from_is_const] = |
186 | 76 | unpack_if_const(block.get_by_position(arguments[0]).column); |
187 | | |
188 | 76 | if (col_from_is_const && col_from->is_null_at(0)) { |
189 | 1 | auto col_str = ColumnString::create(); |
190 | 1 | col_str->insert_default(); |
191 | 1 | auto null_map = ColumnUInt8::create(1, 1); |
192 | 1 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); |
193 | 1 | block.get_by_position(result).column = |
194 | 1 | ColumnConst::create(std::move(nullable_col), input_rows_count); |
195 | 1 | return Status::OK(); |
196 | 1 | } |
197 | | |
198 | 75 | auto null_map = ColumnUInt8::create(0, 0); |
199 | 75 | bool is_nullable = false; |
200 | | |
201 | 75 | switch (nullable_mode) { |
202 | 34 | case NullalbeMode::NULLABLE: { |
203 | 34 | is_nullable = true; |
204 | 34 | break; |
205 | 0 | } |
206 | 41 | case NullalbeMode::FOLLOW_INPUT: { |
207 | 54 | for (auto arg : arguments) { |
208 | 54 | is_nullable |= block.get_by_position(arg).type->is_nullable(); |
209 | 54 | } |
210 | 41 | break; |
211 | 0 | } |
212 | 75 | } |
213 | | |
214 | 75 | if (is_nullable) { |
215 | 65 | null_map = ColumnUInt8::create(input_rows_count, 0); |
216 | 65 | } |
217 | | |
218 | 60 | const ColumnString* col_from_string = nullptr; |
219 | 75 | if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) { |
220 | 40 | VectorizedUtils::update_null_map(null_map->get_data(), |
221 | 40 | nullable_col->get_null_map_data(), col_from_is_const); |
222 | 40 | col_from_string = |
223 | 40 | assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get()); |
224 | 40 | } else { |
225 | 35 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); |
226 | 35 | } |
227 | | |
228 | 60 | StringRef constant_default_value; |
229 | 60 | bool default_value_const = false; |
230 | 60 | bool default_value_null_const = false; |
231 | 60 | ColumnPtr default_value_col; |
232 | 60 | JsonBinaryValue default_jsonb_value_parser; |
233 | 60 | const ColumnString* default_value_str_col = nullptr; |
234 | 60 | const NullMap* default_value_nullmap = nullptr; |
235 | 60 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
236 | 15 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
237 | 15 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
238 | 15 | if (state && state->has_const_default_value) { |
239 | 7 | constant_default_value = state->default_value; |
240 | 7 | default_value_null_const = state->default_is_null; |
241 | 7 | default_value_const = true; |
242 | 8 | } else if (arguments.size() > 1) { |
243 | 8 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != |
244 | 8 | PrimitiveType::TYPE_JSONB) { |
245 | 1 | return Status::InvalidArgument( |
246 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), |
247 | 1 | block.get_by_position(arguments[1]).type->get_name()); |
248 | 1 | } |
249 | 7 | std::tie(default_value_col, default_value_const) = |
250 | 7 | unpack_if_const(block.get_by_position(arguments[1]).column); |
251 | 7 | if (default_value_const) { |
252 | 1 | const JsonbDocument* default_value_doc = nullptr; |
253 | 1 | if (default_value_col->is_null_at(0)) { |
254 | 1 | default_value_null_const = true; |
255 | 1 | } else { |
256 | 0 | auto data = default_value_col->get_data_at(0); |
257 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, |
258 | 0 | &default_value_doc)); |
259 | 0 | constant_default_value = data; |
260 | 0 | } |
261 | 6 | } else { |
262 | 6 | if (const auto* nullable_col = |
263 | 6 | check_and_get_column<ColumnNullable>(default_value_col.get())) { |
264 | 4 | default_value_str_col = assert_cast<const ColumnString*>( |
265 | 4 | nullable_col->get_nested_column_ptr().get()); |
266 | 4 | default_value_nullmap = &(nullable_col->get_null_map_data()); |
267 | 4 | } else { |
268 | 2 | default_value_str_col = |
269 | 2 | assert_cast<const ColumnString*>(default_value_col.get()); |
270 | 2 | } |
271 | 6 | } |
272 | 7 | } else if (arguments.size() == 1) { |
273 | | // parse default value '{}' should always success. |
274 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); |
275 | 0 | default_value_const = true; |
276 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); |
277 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); |
278 | 0 | } |
279 | 15 | } |
280 | | |
281 | 14 | auto col_to = ColumnString::create(); |
282 | | |
283 | 60 | col_to->reserve(input_rows_count); |
284 | | |
285 | 60 | auto& null_map_data = null_map->get_data(); |
286 | | |
287 | | // parser can be reused for performance |
288 | 60 | JsonBinaryValue jsonb_value; |
289 | | |
290 | 1.30k | for (size_t i = 0; i < input_rows_count; ++i) { |
291 | 1.22k | if (is_nullable && null_map_data[i]) { |
292 | 13 | col_to->insert_default(); |
293 | 13 | continue; |
294 | 13 | } |
295 | | |
296 | 1.21k | auto index = index_check_const(i, col_from_is_const); |
297 | 1.21k | const auto& val = col_from_string->get_data_at(index); |
298 | 1.21k | auto st = jsonb_value.from_json_string(val.data, val.size); |
299 | 1.21k | if (st.ok()) { |
300 | | // insert jsonb format data |
301 | 1.16k | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); |
302 | 1.16k | } else { |
303 | 54 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { |
304 | 6 | return Status::InvalidArgument( |
305 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); |
306 | 17 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { |
307 | 17 | null_map_data[i] = 1; |
308 | 17 | col_to->insert_default(); |
309 | 31 | } else { |
310 | 31 | if (default_value_const) { |
311 | 9 | if (default_value_null_const) { |
312 | 3 | null_map_data[i] = 1; |
313 | 3 | col_to->insert_default(); |
314 | 6 | } else { |
315 | 6 | col_to->insert_data(constant_default_value.data, |
316 | 6 | constant_default_value.size); |
317 | 6 | } |
318 | 22 | } else { |
319 | 22 | if (default_value_nullmap && (*default_value_nullmap)[i]) { |
320 | 3 | null_map_data[i] = 1; |
321 | 3 | col_to->insert_default(); |
322 | 3 | continue; |
323 | 3 | } |
324 | 19 | auto value = default_value_str_col->get_data_at(i); |
325 | 19 | col_to->insert_data(value.data, value.size); |
326 | 19 | } |
327 | 31 | } |
328 | 54 | } |
329 | 1.21k | } |
330 | | |
331 | 78 | if (is_nullable) { |
332 | 59 | block.replace_by_position( |
333 | 59 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); |
334 | 59 | } else { |
335 | 19 | block.replace_by_position(result, std::move(col_to)); |
336 | 19 | } |
337 | | |
338 | 18 | return Status::OK(); |
339 | 15 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 26 | uint32_t result, size_t input_rows_count) const override { | 185 | 26 | auto&& [col_from, col_from_is_const] = | 186 | 26 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 26 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 26 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 26 | bool is_nullable = false; | 200 | | | 201 | 26 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 26 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 26 | for (auto arg : arguments) { | 208 | 26 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 26 | } | 210 | 26 | break; | 211 | 0 | } | 212 | 26 | } | 213 | | | 214 | 26 | if (is_nullable) { | 215 | 17 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 17 | } | 217 | | | 218 | 26 | const ColumnString* col_from_string = nullptr; | 219 | 26 | if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) { | 220 | 17 | VectorizedUtils::update_null_map(null_map->get_data(), | 221 | 17 | nullable_col->get_null_map_data(), col_from_is_const); | 222 | 17 | col_from_string = | 223 | 17 | assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get()); | 224 | 17 | } else { | 225 | 9 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 226 | 9 | } | 227 | | | 228 | 26 | StringRef constant_default_value; | 229 | 26 | bool default_value_const = false; | 230 | 26 | bool default_value_null_const = false; | 231 | 26 | ColumnPtr default_value_col; | 232 | 26 | JsonBinaryValue default_jsonb_value_parser; | 233 | 26 | const ColumnString* default_value_str_col = nullptr; | 234 | 26 | const NullMap* default_value_nullmap = nullptr; | 235 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 236 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 237 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 238 | | if (state && state->has_const_default_value) { | 239 | | constant_default_value = state->default_value; | 240 | | default_value_null_const = state->default_is_null; | 241 | | default_value_const = true; | 242 | | } else if (arguments.size() > 1) { | 243 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 244 | | PrimitiveType::TYPE_JSONB) { | 245 | | return Status::InvalidArgument( | 246 | | "{} second argument should be jsonb type, but got {}", get_name(), | 247 | | block.get_by_position(arguments[1]).type->get_name()); | 248 | | } | 249 | | std::tie(default_value_col, default_value_const) = | 250 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 251 | | if (default_value_const) { | 252 | | const JsonbDocument* default_value_doc = nullptr; | 253 | | if (default_value_col->is_null_at(0)) { | 254 | | default_value_null_const = true; | 255 | | } else { | 256 | | auto data = default_value_col->get_data_at(0); | 257 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 258 | | &default_value_doc)); | 259 | | constant_default_value = data; | 260 | | } | 261 | | } else { | 262 | | if (const auto* nullable_col = | 263 | | check_and_get_column<ColumnNullable>(default_value_col.get())) { | 264 | | default_value_str_col = assert_cast<const ColumnString*>( | 265 | | nullable_col->get_nested_column_ptr().get()); | 266 | | default_value_nullmap = &(nullable_col->get_null_map_data()); | 267 | | } else { | 268 | | default_value_str_col = | 269 | | assert_cast<const ColumnString*>(default_value_col.get()); | 270 | | } | 271 | | } | 272 | | } else if (arguments.size() == 1) { | 273 | | // parse default value '{}' should always success. | 274 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 275 | | default_value_const = true; | 276 | | constant_default_value.data = default_jsonb_value_parser.value(); | 277 | | constant_default_value.size = default_jsonb_value_parser.size(); | 278 | | } | 279 | | } | 280 | | | 281 | 26 | auto col_to = ColumnString::create(); | 282 | | | 283 | 26 | col_to->reserve(input_rows_count); | 284 | | | 285 | 26 | auto& null_map_data = null_map->get_data(); | 286 | | | 287 | | // parser can be reused for performance | 288 | 26 | JsonBinaryValue jsonb_value; | 289 | | | 290 | 68 | for (size_t i = 0; i < input_rows_count; ++i) { | 291 | 42 | if (is_nullable && null_map_data[i]) { | 292 | 1 | col_to->insert_default(); | 293 | 1 | continue; | 294 | 1 | } | 295 | | | 296 | 41 | auto index = index_check_const(i, col_from_is_const); | 297 | 41 | const auto& val = col_from_string->get_data_at(index); | 298 | 41 | auto st = jsonb_value.from_json_string(val.data, val.size); | 299 | 41 | if (st.ok()) { | 300 | | // insert jsonb format data | 301 | 35 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 302 | 35 | } else { | 303 | 6 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 304 | 6 | return Status::InvalidArgument( | 305 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 306 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 307 | | null_map_data[i] = 1; | 308 | | col_to->insert_default(); | 309 | | } else { | 310 | | if (default_value_const) { | 311 | | if (default_value_null_const) { | 312 | | null_map_data[i] = 1; | 313 | | col_to->insert_default(); | 314 | | } else { | 315 | | col_to->insert_data(constant_default_value.data, | 316 | | constant_default_value.size); | 317 | | } | 318 | | } else { | 319 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 320 | | null_map_data[i] = 1; | 321 | | col_to->insert_default(); | 322 | | continue; | 323 | | } | 324 | | auto value = default_value_str_col->get_data_at(i); | 325 | | col_to->insert_data(value.data, value.size); | 326 | | } | 327 | | } | 328 | 6 | } | 329 | 41 | } | 330 | | | 331 | 26 | if (is_nullable) { | 332 | 11 | block.replace_by_position( | 333 | 11 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 334 | 15 | } else { | 335 | 15 | block.replace_by_position(result, std::move(col_to)); | 336 | 15 | } | 337 | | | 338 | 26 | return Status::OK(); | 339 | 26 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 34 | uint32_t result, size_t input_rows_count) const override { | 185 | 34 | auto&& [col_from, col_from_is_const] = | 186 | 34 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 34 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 0 | auto col_str = ColumnString::create(); | 190 | 0 | col_str->insert_default(); | 191 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 0 | block.get_by_position(result).column = | 194 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 0 | return Status::OK(); | 196 | 0 | } | 197 | | | 198 | 34 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 34 | bool is_nullable = false; | 200 | | | 201 | 34 | switch (nullable_mode) { | 202 | 34 | case NullalbeMode::NULLABLE: { | 203 | 34 | is_nullable = true; | 204 | 34 | break; | 205 | 0 | } | 206 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 0 | for (auto arg : arguments) { | 208 | 0 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 0 | } | 210 | 0 | break; | 211 | 0 | } | 212 | 34 | } | 213 | | | 214 | 34 | if (is_nullable) { | 215 | 34 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 34 | } | 217 | | | 218 | 34 | const ColumnString* col_from_string = nullptr; | 219 | 34 | if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) { | 220 | 11 | VectorizedUtils::update_null_map(null_map->get_data(), | 221 | 11 | nullable_col->get_null_map_data(), col_from_is_const); | 222 | 11 | col_from_string = | 223 | 11 | assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get()); | 224 | 23 | } else { | 225 | 23 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 226 | 23 | } | 227 | | | 228 | 34 | StringRef constant_default_value; | 229 | 34 | bool default_value_const = false; | 230 | 34 | bool default_value_null_const = false; | 231 | 34 | ColumnPtr default_value_col; | 232 | 34 | JsonBinaryValue default_jsonb_value_parser; | 233 | 34 | const ColumnString* default_value_str_col = nullptr; | 234 | 34 | const NullMap* default_value_nullmap = nullptr; | 235 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 236 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 237 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 238 | | if (state && state->has_const_default_value) { | 239 | | constant_default_value = state->default_value; | 240 | | default_value_null_const = state->default_is_null; | 241 | | default_value_const = true; | 242 | | } else if (arguments.size() > 1) { | 243 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 244 | | PrimitiveType::TYPE_JSONB) { | 245 | | return Status::InvalidArgument( | 246 | | "{} second argument should be jsonb type, but got {}", get_name(), | 247 | | block.get_by_position(arguments[1]).type->get_name()); | 248 | | } | 249 | | std::tie(default_value_col, default_value_const) = | 250 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 251 | | if (default_value_const) { | 252 | | const JsonbDocument* default_value_doc = nullptr; | 253 | | if (default_value_col->is_null_at(0)) { | 254 | | default_value_null_const = true; | 255 | | } else { | 256 | | auto data = default_value_col->get_data_at(0); | 257 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 258 | | &default_value_doc)); | 259 | | constant_default_value = data; | 260 | | } | 261 | | } else { | 262 | | if (const auto* nullable_col = | 263 | | check_and_get_column<ColumnNullable>(default_value_col.get())) { | 264 | | default_value_str_col = assert_cast<const ColumnString*>( | 265 | | nullable_col->get_nested_column_ptr().get()); | 266 | | default_value_nullmap = &(nullable_col->get_null_map_data()); | 267 | | } else { | 268 | | default_value_str_col = | 269 | | assert_cast<const ColumnString*>(default_value_col.get()); | 270 | | } | 271 | | } | 272 | | } else if (arguments.size() == 1) { | 273 | | // parse default value '{}' should always success. | 274 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 275 | | default_value_const = true; | 276 | | constant_default_value.data = default_jsonb_value_parser.value(); | 277 | | constant_default_value.size = default_jsonb_value_parser.size(); | 278 | | } | 279 | | } | 280 | | | 281 | 34 | auto col_to = ColumnString::create(); | 282 | | | 283 | 34 | col_to->reserve(input_rows_count); | 284 | | | 285 | 34 | auto& null_map_data = null_map->get_data(); | 286 | | | 287 | | // parser can be reused for performance | 288 | 34 | JsonBinaryValue jsonb_value; | 289 | | | 290 | 99 | for (size_t i = 0; i < input_rows_count; ++i) { | 291 | 65 | if (is_nullable && null_map_data[i]) { | 292 | 6 | col_to->insert_default(); | 293 | 6 | continue; | 294 | 6 | } | 295 | | | 296 | 59 | auto index = index_check_const(i, col_from_is_const); | 297 | 59 | const auto& val = col_from_string->get_data_at(index); | 298 | 59 | auto st = jsonb_value.from_json_string(val.data, val.size); | 299 | 59 | if (st.ok()) { | 300 | | // insert jsonb format data | 301 | 42 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 302 | 42 | } else { | 303 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 304 | | return Status::InvalidArgument( | 305 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 306 | 17 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 307 | 17 | null_map_data[i] = 1; | 308 | 17 | col_to->insert_default(); | 309 | | } else { | 310 | | if (default_value_const) { | 311 | | if (default_value_null_const) { | 312 | | null_map_data[i] = 1; | 313 | | col_to->insert_default(); | 314 | | } else { | 315 | | col_to->insert_data(constant_default_value.data, | 316 | | constant_default_value.size); | 317 | | } | 318 | | } else { | 319 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 320 | | null_map_data[i] = 1; | 321 | | col_to->insert_default(); | 322 | | continue; | 323 | | } | 324 | | auto value = default_value_str_col->get_data_at(i); | 325 | | col_to->insert_data(value.data, value.size); | 326 | | } | 327 | | } | 328 | 17 | } | 329 | 59 | } | 330 | | | 331 | 34 | if (is_nullable) { | 332 | 34 | block.replace_by_position( | 333 | 34 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 334 | 34 | } else { | 335 | 0 | block.replace_by_position(result, std::move(col_to)); | 336 | 0 | } | 337 | | | 338 | 34 | return Status::OK(); | 339 | 34 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 184 | 16 | uint32_t result, size_t input_rows_count) const override { | 185 | 16 | auto&& [col_from, col_from_is_const] = | 186 | 16 | unpack_if_const(block.get_by_position(arguments[0]).column); | 187 | | | 188 | 16 | if (col_from_is_const && col_from->is_null_at(0)) { | 189 | 1 | auto col_str = ColumnString::create(); | 190 | 1 | col_str->insert_default(); | 191 | 1 | auto null_map = ColumnUInt8::create(1, 1); | 192 | 1 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 193 | 1 | block.get_by_position(result).column = | 194 | 1 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 195 | 1 | return Status::OK(); | 196 | 1 | } | 197 | | | 198 | 15 | auto null_map = ColumnUInt8::create(0, 0); | 199 | 15 | bool is_nullable = false; | 200 | | | 201 | 15 | switch (nullable_mode) { | 202 | 0 | case NullalbeMode::NULLABLE: { | 203 | 0 | is_nullable = true; | 204 | 0 | break; | 205 | 0 | } | 206 | 15 | case NullalbeMode::FOLLOW_INPUT: { | 207 | 28 | for (auto arg : arguments) { | 208 | 28 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 209 | 28 | } | 210 | 15 | break; | 211 | 0 | } | 212 | 15 | } | 213 | | | 214 | 15 | if (is_nullable) { | 215 | 14 | null_map = ColumnUInt8::create(input_rows_count, 0); | 216 | 14 | } | 217 | | | 218 | 15 | const ColumnString* col_from_string = nullptr; | 219 | 15 | if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) { | 220 | 12 | VectorizedUtils::update_null_map(null_map->get_data(), | 221 | 12 | nullable_col->get_null_map_data(), col_from_is_const); | 222 | 12 | col_from_string = | 223 | 12 | assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get()); | 224 | 12 | } else { | 225 | 3 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 226 | 3 | } | 227 | | | 228 | 15 | StringRef constant_default_value; | 229 | 15 | bool default_value_const = false; | 230 | 15 | bool default_value_null_const = false; | 231 | 15 | ColumnPtr default_value_col; | 232 | 15 | JsonBinaryValue default_jsonb_value_parser; | 233 | 15 | const ColumnString* default_value_str_col = nullptr; | 234 | 15 | const NullMap* default_value_nullmap = nullptr; | 235 | 15 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 236 | 15 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 237 | 15 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 238 | 15 | if (state && state->has_const_default_value) { | 239 | 7 | constant_default_value = state->default_value; | 240 | 7 | default_value_null_const = state->default_is_null; | 241 | 7 | default_value_const = true; | 242 | 8 | } else if (arguments.size() > 1) { | 243 | 8 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 244 | 8 | PrimitiveType::TYPE_JSONB) { | 245 | 1 | return Status::InvalidArgument( | 246 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), | 247 | 1 | block.get_by_position(arguments[1]).type->get_name()); | 248 | 1 | } | 249 | 7 | std::tie(default_value_col, default_value_const) = | 250 | 7 | unpack_if_const(block.get_by_position(arguments[1]).column); | 251 | 7 | if (default_value_const) { | 252 | 1 | const JsonbDocument* default_value_doc = nullptr; | 253 | 1 | if (default_value_col->is_null_at(0)) { | 254 | 1 | default_value_null_const = true; | 255 | 1 | } else { | 256 | 0 | auto data = default_value_col->get_data_at(0); | 257 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 258 | 0 | &default_value_doc)); | 259 | 0 | constant_default_value = data; | 260 | 0 | } | 261 | 6 | } else { | 262 | 6 | if (const auto* nullable_col = | 263 | 6 | check_and_get_column<ColumnNullable>(default_value_col.get())) { | 264 | 4 | default_value_str_col = assert_cast<const ColumnString*>( | 265 | 4 | nullable_col->get_nested_column_ptr().get()); | 266 | 4 | default_value_nullmap = &(nullable_col->get_null_map_data()); | 267 | 4 | } else { | 268 | 2 | default_value_str_col = | 269 | 2 | assert_cast<const ColumnString*>(default_value_col.get()); | 270 | 2 | } | 271 | 6 | } | 272 | 7 | } else if (arguments.size() == 1) { | 273 | | // parse default value '{}' should always success. | 274 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 275 | 0 | default_value_const = true; | 276 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); | 277 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); | 278 | 0 | } | 279 | 15 | } | 280 | | | 281 | 14 | auto col_to = ColumnString::create(); | 282 | | | 283 | 15 | col_to->reserve(input_rows_count); | 284 | | | 285 | 15 | auto& null_map_data = null_map->get_data(); | 286 | | | 287 | | // parser can be reused for performance | 288 | 15 | JsonBinaryValue jsonb_value; | 289 | | | 290 | 1.14k | for (size_t i = 0; i < input_rows_count; ++i) { | 291 | 1.12k | if (is_nullable && null_map_data[i]) { | 292 | 6 | col_to->insert_default(); | 293 | 6 | continue; | 294 | 6 | } | 295 | | | 296 | 1.11k | auto index = index_check_const(i, col_from_is_const); | 297 | 1.11k | const auto& val = col_from_string->get_data_at(index); | 298 | 1.11k | auto st = jsonb_value.from_json_string(val.data, val.size); | 299 | 1.11k | if (st.ok()) { | 300 | | // insert jsonb format data | 301 | 1.08k | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 302 | 1.08k | } else { | 303 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 304 | | return Status::InvalidArgument( | 305 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 306 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 307 | | null_map_data[i] = 1; | 308 | | col_to->insert_default(); | 309 | 31 | } else { | 310 | 31 | if (default_value_const) { | 311 | 9 | if (default_value_null_const) { | 312 | 3 | null_map_data[i] = 1; | 313 | 3 | col_to->insert_default(); | 314 | 6 | } else { | 315 | 6 | col_to->insert_data(constant_default_value.data, | 316 | 6 | constant_default_value.size); | 317 | 6 | } | 318 | 22 | } else { | 319 | 22 | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 320 | 3 | null_map_data[i] = 1; | 321 | 3 | col_to->insert_default(); | 322 | 3 | continue; | 323 | 3 | } | 324 | 19 | auto value = default_value_str_col->get_data_at(i); | 325 | 19 | col_to->insert_data(value.data, value.size); | 326 | 19 | } | 327 | 31 | } | 328 | 31 | } | 329 | 1.11k | } | 330 | | | 331 | 18 | if (is_nullable) { | 332 | 14 | block.replace_by_position( | 333 | 14 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 334 | 14 | } else { | 335 | 4 | block.replace_by_position(result, std::move(col_to)); | 336 | 4 | } | 337 | | | 338 | 18 | return Status::OK(); | 339 | 15 | } |
|
340 | | }; |
341 | | |
342 | | // jsonb_parse return type nullable as input |
343 | | using FunctionJsonbParse = |
344 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>; |
345 | | using FunctionJsonbParseErrorNull = |
346 | | FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>; |
347 | | using FunctionJsonbParseErrorValue = |
348 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>; |
349 | | |
350 | | // func(jsonb, [varchar, varchar, ...]) -> nullable(type) |
351 | | template <typename Impl> |
352 | | class FunctionJsonbExtract : public IFunction { |
353 | | public: |
354 | | static constexpr auto name = Impl::name; |
355 | | static constexpr auto alias = Impl::alias; |
356 | 1.73k | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv Line | Count | Source | 356 | 149 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv Line | Count | Source | 356 | 148 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv Line | Count | Source | 356 | 1.41k | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv Line | Count | Source | 356 | 18 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
|
357 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev |
358 | 1.70k | bool is_variadic() const override { return true; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv Line | Count | Source | 358 | 141 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv Line | Count | Source | 358 | 140 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv Line | Count | Source | 358 | 1.41k | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv Line | Count | Source | 358 | 10 | bool is_variadic() const override { return true; } |
|
359 | 1 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv Line | Count | Source | 359 | 1 | size_t get_number_of_arguments() const override { return 0; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv |
360 | 14.3k | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv Line | Count | Source | 360 | 1.46k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv Line | Count | Source | 360 | 1.46k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv Line | Count | Source | 360 | 11.4k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv Line | Count | Source | 360 | 18 | bool use_default_implementation_for_nulls() const override { return false; } |
|
361 | 1.69k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
362 | 1.69k | return make_nullable(std::make_shared<typename Impl::ReturnType>()); |
363 | 1.69k | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 140 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 140 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 363 | 140 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 139 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 139 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 363 | 139 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 1.40k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 1.40k | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 363 | 1.40k | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 361 | 9 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 362 | 9 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 363 | 9 | } |
|
364 | 32 | DataTypes get_variadic_argument_types_impl() const override { |
365 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { |
366 | | return Impl::get_variadic_argument_types_impl(); |
367 | 32 | } else { |
368 | 32 | return {}; |
369 | 32 | } |
370 | 32 | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 364 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 365 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 366 | | return Impl::get_variadic_argument_types_impl(); | 367 | 8 | } else { | 368 | 8 | return {}; | 369 | 8 | } | 370 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv Line | Count | Source | 364 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 365 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 366 | | return Impl::get_variadic_argument_types_impl(); | 367 | 8 | } else { | 368 | 8 | return {}; | 369 | 8 | } | 370 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv Line | Count | Source | 364 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 365 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 366 | | return Impl::get_variadic_argument_types_impl(); | 367 | 8 | } else { | 368 | 8 | return {}; | 369 | 8 | } | 370 | 8 | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv Line | Count | Source | 364 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 365 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 366 | | return Impl::get_variadic_argument_types_impl(); | 367 | 8 | } else { | 368 | 8 | return {}; | 369 | 8 | } | 370 | 8 | } |
|
371 | | |
372 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
373 | 12.6k | uint32_t result, size_t input_rows_count) const override { |
374 | 12.6k | DORIS_CHECK_GE(arguments.size(), 2); |
375 | | |
376 | 12.6k | ColumnPtr jsonb_data_column; |
377 | 12.6k | bool jsonb_data_const = false; |
378 | 12.6k | const NullMap* data_null_map = nullptr; |
379 | | |
380 | 12.6k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != |
381 | 12.6k | PrimitiveType::TYPE_JSONB) { |
382 | 1 | return Status::InvalidArgument( |
383 | 1 | "jsonb_extract first argument should be json type, but got {}", |
384 | 1 | block.get_by_position(arguments[0]).type->get_name()); |
385 | 1 | } |
386 | | |
387 | | // prepare jsonb data column |
388 | 12.6k | std::tie(jsonb_data_column, jsonb_data_const) = |
389 | 12.6k | unpack_if_const(block.get_by_position(arguments[0]).column); |
390 | 12.6k | if (const auto* nullable_column = |
391 | 12.6k | check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { |
392 | 10.8k | jsonb_data_column = nullable_column->get_nested_column_ptr(); |
393 | 10.8k | data_null_map = &nullable_column->get_null_map_data(); |
394 | 10.8k | } |
395 | 12.6k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); |
396 | 12.6k | const auto& loffsets = |
397 | 12.6k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); |
398 | | |
399 | | // prepare parse path column prepare |
400 | 12.6k | std::vector<const ColumnString*> jsonb_path_columns; |
401 | 12.6k | std::vector<bool> path_const(arguments.size() - 1); |
402 | 12.6k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); |
403 | 25.6k | for (int i = 0; i < arguments.size() - 1; ++i) { |
404 | 13.0k | ColumnPtr path_column; |
405 | 13.0k | bool is_const = false; |
406 | 13.0k | std::tie(path_column, is_const) = |
407 | 13.0k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
408 | 13.0k | path_const[i] = is_const; |
409 | 13.0k | if (const auto* nullable_column = |
410 | 13.0k | check_and_get_column<ColumnNullable>(path_column.get())) { |
411 | 70 | path_column = nullable_column->get_nested_column_ptr(); |
412 | 70 | path_null_maps[i] = &nullable_column->get_null_map_data(); |
413 | 70 | } |
414 | 13.0k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); |
415 | 13.0k | } |
416 | | |
417 | 12.6k | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
418 | 12.6k | auto res = Impl::ColumnType::create(); |
419 | | |
420 | | // execute Impl |
421 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || |
422 | 11.3k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { |
423 | 11.3k | auto& res_data = res->get_chars(); |
424 | 11.3k | auto& res_offsets = res->get_offsets(); |
425 | 11.3k | RETURN_IF_ERROR(Impl::vector_vector_v2( |
426 | 11.3k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, |
427 | 11.3k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); |
428 | 11.3k | } else { |
429 | | // not support other extract type for now (e.g. int, double, ...) |
430 | 1.32k | DORIS_CHECK_EQ(jsonb_path_columns.size(), 1); |
431 | 1.32k | const auto& rdata = jsonb_path_columns[0]->get_chars(); |
432 | 1.32k | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); |
433 | | |
434 | 1.32k | auto create_all_null_result = [&]() { |
435 | 2 | res = Impl::ColumnType::create(); |
436 | 2 | res->insert_default(); |
437 | 2 | auto nullable_column = |
438 | 2 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
439 | 2 | auto const_column = |
440 | 2 | ColumnConst::create(std::move(nullable_column), input_rows_count); |
441 | 2 | block.get_by_position(result).column = std::move(const_column); |
442 | 2 | return Status::OK(); |
443 | 2 | }; |
444 | | |
445 | 1.32k | if (jsonb_data_const) { |
446 | 2 | if (data_null_map && (*data_null_map)[0]) { |
447 | 1 | return create_all_null_result(); |
448 | 1 | } |
449 | | |
450 | 1 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), |
451 | 1 | rdata, roffsets, path_null_maps[0], |
452 | 1 | res->get_data(), null_map->get_data())); |
453 | 1.32k | } else if (path_const[0]) { |
454 | 1.32k | if (path_null_maps[0] && (*path_null_maps[0])[0]) { |
455 | 1 | return create_all_null_result(); |
456 | 1 | } |
457 | 1.32k | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, |
458 | 1.32k | jsonb_path_columns[0]->get_data_at(0), |
459 | 1.32k | res->get_data(), null_map->get_data())); |
460 | 1.32k | } else { |
461 | 4 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, |
462 | 4 | roffsets, path_null_maps[0], res->get_data(), |
463 | 4 | null_map->get_data())); |
464 | 4 | } |
465 | 1.32k | } |
466 | | |
467 | 12.6k | block.get_by_position(result).column = |
468 | 12.6k | ColumnNullable::create(std::move(res), std::move(null_map)); |
469 | 12.6k | return Status::OK(); |
470 | 12.6k | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 373 | 1.32k | uint32_t result, size_t input_rows_count) const override { | 374 | 1.32k | DORIS_CHECK_GE(arguments.size(), 2); | 375 | | | 376 | 1.32k | ColumnPtr jsonb_data_column; | 377 | 1.32k | bool jsonb_data_const = false; | 378 | 1.32k | const NullMap* data_null_map = nullptr; | 379 | | | 380 | 1.32k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 381 | 1.32k | PrimitiveType::TYPE_JSONB) { | 382 | 0 | return Status::InvalidArgument( | 383 | 0 | "jsonb_extract first argument should be json type, but got {}", | 384 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 385 | 0 | } | 386 | | | 387 | | // prepare jsonb data column | 388 | 1.32k | std::tie(jsonb_data_column, jsonb_data_const) = | 389 | 1.32k | unpack_if_const(block.get_by_position(arguments[0]).column); | 390 | 1.32k | if (const auto* nullable_column = | 391 | 1.32k | check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { | 392 | 1.14k | jsonb_data_column = nullable_column->get_nested_column_ptr(); | 393 | 1.14k | data_null_map = &nullable_column->get_null_map_data(); | 394 | 1.14k | } | 395 | 1.32k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 396 | 1.32k | const auto& loffsets = | 397 | 1.32k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 398 | | | 399 | | // prepare parse path column prepare | 400 | 1.32k | std::vector<const ColumnString*> jsonb_path_columns; | 401 | 1.32k | std::vector<bool> path_const(arguments.size() - 1); | 402 | 1.32k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 403 | 2.65k | for (int i = 0; i < arguments.size() - 1; ++i) { | 404 | 1.32k | ColumnPtr path_column; | 405 | 1.32k | bool is_const = false; | 406 | 1.32k | std::tie(path_column, is_const) = | 407 | 1.32k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 408 | 1.32k | path_const[i] = is_const; | 409 | 1.32k | if (const auto* nullable_column = | 410 | 1.32k | check_and_get_column<ColumnNullable>(path_column.get())) { | 411 | 5 | path_column = nullable_column->get_nested_column_ptr(); | 412 | 5 | path_null_maps[i] = &nullable_column->get_null_map_data(); | 413 | 5 | } | 414 | 1.32k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 415 | 1.32k | } | 416 | | | 417 | 1.32k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 418 | 1.32k | auto res = Impl::ColumnType::create(); | 419 | | | 420 | | // execute Impl | 421 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 422 | 1.32k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 423 | 1.32k | auto& res_data = res->get_chars(); | 424 | 1.32k | auto& res_offsets = res->get_offsets(); | 425 | 1.32k | RETURN_IF_ERROR(Impl::vector_vector_v2( | 426 | 1.32k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 427 | 1.32k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 428 | | } else { | 429 | | // not support other extract type for now (e.g. int, double, ...) | 430 | | DORIS_CHECK_EQ(jsonb_path_columns.size(), 1); | 431 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 432 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 433 | | | 434 | | auto create_all_null_result = [&]() { | 435 | | res = Impl::ColumnType::create(); | 436 | | res->insert_default(); | 437 | | auto nullable_column = | 438 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 439 | | auto const_column = | 440 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 441 | | block.get_by_position(result).column = std::move(const_column); | 442 | | return Status::OK(); | 443 | | }; | 444 | | | 445 | | if (jsonb_data_const) { | 446 | | if (data_null_map && (*data_null_map)[0]) { | 447 | | return create_all_null_result(); | 448 | | } | 449 | | | 450 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 451 | | rdata, roffsets, path_null_maps[0], | 452 | | res->get_data(), null_map->get_data())); | 453 | | } else if (path_const[0]) { | 454 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 455 | | return create_all_null_result(); | 456 | | } | 457 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 458 | | jsonb_path_columns[0]->get_data_at(0), | 459 | | res->get_data(), null_map->get_data())); | 460 | | } else { | 461 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 462 | | roffsets, path_null_maps[0], res->get_data(), | 463 | | null_map->get_data())); | 464 | | } | 465 | | } | 466 | | | 467 | 1.32k | block.get_by_position(result).column = | 468 | 1.32k | ColumnNullable::create(std::move(res), std::move(null_map)); | 469 | 1.32k | return Status::OK(); | 470 | 1.32k | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 373 | 1.32k | uint32_t result, size_t input_rows_count) const override { | 374 | 1.32k | DORIS_CHECK_GE(arguments.size(), 2); | 375 | | | 376 | 1.32k | ColumnPtr jsonb_data_column; | 377 | 1.32k | bool jsonb_data_const = false; | 378 | 1.32k | const NullMap* data_null_map = nullptr; | 379 | | | 380 | 1.32k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 381 | 1.32k | PrimitiveType::TYPE_JSONB) { | 382 | 0 | return Status::InvalidArgument( | 383 | 0 | "jsonb_extract first argument should be json type, but got {}", | 384 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 385 | 0 | } | 386 | | | 387 | | // prepare jsonb data column | 388 | 1.32k | std::tie(jsonb_data_column, jsonb_data_const) = | 389 | 1.32k | unpack_if_const(block.get_by_position(arguments[0]).column); | 390 | 1.32k | if (const auto* nullable_column = | 391 | 1.32k | check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { | 392 | 1.14k | jsonb_data_column = nullable_column->get_nested_column_ptr(); | 393 | 1.14k | data_null_map = &nullable_column->get_null_map_data(); | 394 | 1.14k | } | 395 | 1.32k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 396 | 1.32k | const auto& loffsets = | 397 | 1.32k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 398 | | | 399 | | // prepare parse path column prepare | 400 | 1.32k | std::vector<const ColumnString*> jsonb_path_columns; | 401 | 1.32k | std::vector<bool> path_const(arguments.size() - 1); | 402 | 1.32k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 403 | 2.65k | for (int i = 0; i < arguments.size() - 1; ++i) { | 404 | 1.32k | ColumnPtr path_column; | 405 | 1.32k | bool is_const = false; | 406 | 1.32k | std::tie(path_column, is_const) = | 407 | 1.32k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 408 | 1.32k | path_const[i] = is_const; | 409 | 1.32k | if (const auto* nullable_column = | 410 | 1.32k | check_and_get_column<ColumnNullable>(path_column.get())) { | 411 | 4 | path_column = nullable_column->get_nested_column_ptr(); | 412 | 4 | path_null_maps[i] = &nullable_column->get_null_map_data(); | 413 | 4 | } | 414 | 1.32k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 415 | 1.32k | } | 416 | | | 417 | 1.32k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 418 | 1.32k | auto res = Impl::ColumnType::create(); | 419 | | | 420 | | // execute Impl | 421 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 422 | | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 423 | | auto& res_data = res->get_chars(); | 424 | | auto& res_offsets = res->get_offsets(); | 425 | | RETURN_IF_ERROR(Impl::vector_vector_v2( | 426 | | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 427 | | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 428 | 1.32k | } else { | 429 | | // not support other extract type for now (e.g. int, double, ...) | 430 | 1.32k | DORIS_CHECK_EQ(jsonb_path_columns.size(), 1); | 431 | 1.32k | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 432 | 1.32k | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 433 | | | 434 | 1.32k | auto create_all_null_result = [&]() { | 435 | 1.32k | res = Impl::ColumnType::create(); | 436 | 1.32k | res->insert_default(); | 437 | 1.32k | auto nullable_column = | 438 | 1.32k | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 439 | 1.32k | auto const_column = | 440 | 1.32k | ColumnConst::create(std::move(nullable_column), input_rows_count); | 441 | 1.32k | block.get_by_position(result).column = std::move(const_column); | 442 | 1.32k | return Status::OK(); | 443 | 1.32k | }; | 444 | | | 445 | 1.32k | if (jsonb_data_const) { | 446 | 2 | if (data_null_map && (*data_null_map)[0]) { | 447 | 1 | return create_all_null_result(); | 448 | 1 | } | 449 | | | 450 | 1 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 451 | 1 | rdata, roffsets, path_null_maps[0], | 452 | 1 | res->get_data(), null_map->get_data())); | 453 | 1.32k | } else if (path_const[0]) { | 454 | 1.32k | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 455 | 1 | return create_all_null_result(); | 456 | 1 | } | 457 | 1.32k | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 458 | 1.32k | jsonb_path_columns[0]->get_data_at(0), | 459 | 1.32k | res->get_data(), null_map->get_data())); | 460 | 1.32k | } else { | 461 | 4 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 462 | 4 | roffsets, path_null_maps[0], res->get_data(), | 463 | 4 | null_map->get_data())); | 464 | 4 | } | 465 | 1.32k | } | 466 | | | 467 | 1.32k | block.get_by_position(result).column = | 468 | 1.32k | ColumnNullable::create(std::move(res), std::move(null_map)); | 469 | 1.32k | return Status::OK(); | 470 | 1.32k | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 373 | 10.0k | uint32_t result, size_t input_rows_count) const override { | 374 | 10.0k | DORIS_CHECK_GE(arguments.size(), 2); | 375 | | | 376 | 10.0k | ColumnPtr jsonb_data_column; | 377 | 10.0k | bool jsonb_data_const = false; | 378 | 10.0k | const NullMap* data_null_map = nullptr; | 379 | | | 380 | 10.0k | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 381 | 10.0k | PrimitiveType::TYPE_JSONB) { | 382 | 1 | return Status::InvalidArgument( | 383 | 1 | "jsonb_extract first argument should be json type, but got {}", | 384 | 1 | block.get_by_position(arguments[0]).type->get_name()); | 385 | 1 | } | 386 | | | 387 | | // prepare jsonb data column | 388 | 10.0k | std::tie(jsonb_data_column, jsonb_data_const) = | 389 | 10.0k | unpack_if_const(block.get_by_position(arguments[0]).column); | 390 | 10.0k | if (const auto* nullable_column = | 391 | 10.0k | check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { | 392 | 8.53k | jsonb_data_column = nullable_column->get_nested_column_ptr(); | 393 | 8.53k | data_null_map = &nullable_column->get_null_map_data(); | 394 | 8.53k | } | 395 | 10.0k | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 396 | 10.0k | const auto& loffsets = | 397 | 10.0k | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 398 | | | 399 | | // prepare parse path column prepare | 400 | 10.0k | std::vector<const ColumnString*> jsonb_path_columns; | 401 | 10.0k | std::vector<bool> path_const(arguments.size() - 1); | 402 | 10.0k | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 403 | 20.3k | for (int i = 0; i < arguments.size() - 1; ++i) { | 404 | 10.3k | ColumnPtr path_column; | 405 | 10.3k | bool is_const = false; | 406 | 10.3k | std::tie(path_column, is_const) = | 407 | 10.3k | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 408 | 10.3k | path_const[i] = is_const; | 409 | 10.3k | if (const auto* nullable_column = | 410 | 10.3k | check_and_get_column<ColumnNullable>(path_column.get())) { | 411 | 60 | path_column = nullable_column->get_nested_column_ptr(); | 412 | 60 | path_null_maps[i] = &nullable_column->get_null_map_data(); | 413 | 60 | } | 414 | 10.3k | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 415 | 10.3k | } | 416 | | | 417 | 10.0k | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 418 | 10.0k | auto res = Impl::ColumnType::create(); | 419 | | | 420 | | // execute Impl | 421 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 422 | 10.0k | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 423 | 10.0k | auto& res_data = res->get_chars(); | 424 | 10.0k | auto& res_offsets = res->get_offsets(); | 425 | 10.0k | RETURN_IF_ERROR(Impl::vector_vector_v2( | 426 | 10.0k | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 427 | 10.0k | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 428 | | } else { | 429 | | // not support other extract type for now (e.g. int, double, ...) | 430 | | DORIS_CHECK_EQ(jsonb_path_columns.size(), 1); | 431 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 432 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 433 | | | 434 | | auto create_all_null_result = [&]() { | 435 | | res = Impl::ColumnType::create(); | 436 | | res->insert_default(); | 437 | | auto nullable_column = | 438 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 439 | | auto const_column = | 440 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 441 | | block.get_by_position(result).column = std::move(const_column); | 442 | | return Status::OK(); | 443 | | }; | 444 | | | 445 | | if (jsonb_data_const) { | 446 | | if (data_null_map && (*data_null_map)[0]) { | 447 | | return create_all_null_result(); | 448 | | } | 449 | | | 450 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 451 | | rdata, roffsets, path_null_maps[0], | 452 | | res->get_data(), null_map->get_data())); | 453 | | } else if (path_const[0]) { | 454 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 455 | | return create_all_null_result(); | 456 | | } | 457 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 458 | | jsonb_path_columns[0]->get_data_at(0), | 459 | | res->get_data(), null_map->get_data())); | 460 | | } else { | 461 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 462 | | roffsets, path_null_maps[0], res->get_data(), | 463 | | null_map->get_data())); | 464 | | } | 465 | | } | 466 | | | 467 | 9.99k | block.get_by_position(result).column = | 468 | 10.0k | ColumnNullable::create(std::move(res), std::move(null_map)); | 469 | 10.0k | return Status::OK(); | 470 | 10.0k | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 373 | 9 | uint32_t result, size_t input_rows_count) const override { | 374 | 9 | DORIS_CHECK_GE(arguments.size(), 2); | 375 | | | 376 | 9 | ColumnPtr jsonb_data_column; | 377 | 9 | bool jsonb_data_const = false; | 378 | 9 | const NullMap* data_null_map = nullptr; | 379 | | | 380 | 9 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 381 | 9 | PrimitiveType::TYPE_JSONB) { | 382 | 0 | return Status::InvalidArgument( | 383 | 0 | "jsonb_extract first argument should be json type, but got {}", | 384 | 0 | block.get_by_position(arguments[0]).type->get_name()); | 385 | 0 | } | 386 | | | 387 | | // prepare jsonb data column | 388 | 9 | std::tie(jsonb_data_column, jsonb_data_const) = | 389 | 9 | unpack_if_const(block.get_by_position(arguments[0]).column); | 390 | 9 | if (const auto* nullable_column = | 391 | 9 | check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { | 392 | 9 | jsonb_data_column = nullable_column->get_nested_column_ptr(); | 393 | 9 | data_null_map = &nullable_column->get_null_map_data(); | 394 | 9 | } | 395 | 9 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 396 | 9 | const auto& loffsets = | 397 | 9 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 398 | | | 399 | | // prepare parse path column prepare | 400 | 9 | std::vector<const ColumnString*> jsonb_path_columns; | 401 | 9 | std::vector<bool> path_const(arguments.size() - 1); | 402 | 9 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 403 | 22 | for (int i = 0; i < arguments.size() - 1; ++i) { | 404 | 13 | ColumnPtr path_column; | 405 | 13 | bool is_const = false; | 406 | 13 | std::tie(path_column, is_const) = | 407 | 13 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 408 | 13 | path_const[i] = is_const; | 409 | 13 | if (const auto* nullable_column = | 410 | 13 | check_and_get_column<ColumnNullable>(path_column.get())) { | 411 | 1 | path_column = nullable_column->get_nested_column_ptr(); | 412 | 1 | path_null_maps[i] = &nullable_column->get_null_map_data(); | 413 | 1 | } | 414 | 13 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 415 | 13 | } | 416 | | | 417 | 9 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 418 | 9 | auto res = Impl::ColumnType::create(); | 419 | | | 420 | | // execute Impl | 421 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 422 | 9 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 423 | 9 | auto& res_data = res->get_chars(); | 424 | 9 | auto& res_offsets = res->get_offsets(); | 425 | 9 | RETURN_IF_ERROR(Impl::vector_vector_v2( | 426 | 9 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 427 | 9 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 428 | | } else { | 429 | | // not support other extract type for now (e.g. int, double, ...) | 430 | | DORIS_CHECK_EQ(jsonb_path_columns.size(), 1); | 431 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 432 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 433 | | | 434 | | auto create_all_null_result = [&]() { | 435 | | res = Impl::ColumnType::create(); | 436 | | res->insert_default(); | 437 | | auto nullable_column = | 438 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 439 | | auto const_column = | 440 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 441 | | block.get_by_position(result).column = std::move(const_column); | 442 | | return Status::OK(); | 443 | | }; | 444 | | | 445 | | if (jsonb_data_const) { | 446 | | if (data_null_map && (*data_null_map)[0]) { | 447 | | return create_all_null_result(); | 448 | | } | 449 | | | 450 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 451 | | rdata, roffsets, path_null_maps[0], | 452 | | res->get_data(), null_map->get_data())); | 453 | | } else if (path_const[0]) { | 454 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 455 | | return create_all_null_result(); | 456 | | } | 457 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 458 | | jsonb_path_columns[0]->get_data_at(0), | 459 | | res->get_data(), null_map->get_data())); | 460 | | } else { | 461 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 462 | | roffsets, path_null_maps[0], res->get_data(), | 463 | | null_map->get_data())); | 464 | | } | 465 | | } | 466 | | | 467 | 9 | block.get_by_position(result).column = | 468 | 9 | ColumnNullable::create(std::move(res), std::move(null_map)); | 469 | 9 | return Status::OK(); | 470 | 9 | } |
|
471 | | }; |
472 | | |
473 | | class FunctionJsonbKeys : public IFunction { |
474 | | public: |
475 | | static constexpr auto name = "json_keys"; |
476 | | static constexpr auto alias = "jsonb_keys"; |
477 | 52 | static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); } |
478 | 0 | String get_name() const override { return name; } |
479 | 44 | bool is_variadic() const override { return true; } |
480 | 0 | size_t get_number_of_arguments() const override { return 0; } |
481 | | |
482 | 148 | bool use_default_implementation_for_nulls() const override { return false; } |
483 | | |
484 | 43 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
485 | 43 | return make_nullable( |
486 | 43 | std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>()))); |
487 | 43 | } |
488 | | |
489 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
490 | 105 | uint32_t result, size_t input_rows_count) const override { |
491 | 105 | DORIS_CHECK_GE(arguments.size(), 1); |
492 | 105 | DORIS_CHECK(arguments.size() == 1 || arguments.size() == 2) |
493 | 0 | << "json_keys should have 1 or 2 arguments, but got " << arguments.size(); |
494 | | |
495 | 105 | const NullMap* data_null_map = nullptr; |
496 | 105 | const ColumnString* col_from_string = nullptr; |
497 | | // prepare jsonb data column |
498 | 105 | auto&& [jsonb_data_column, json_data_const] = |
499 | 105 | unpack_if_const(block.get_by_position(arguments[0]).column); |
500 | 105 | if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { |
501 | 99 | col_from_string = |
502 | 99 | assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
503 | 99 | data_null_map = &nullable->get_null_map_data(); |
504 | 99 | } else { |
505 | 6 | col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
506 | 6 | } |
507 | | |
508 | | // prepare parse path column prepare, maybe we do not have path column |
509 | 105 | ColumnPtr jsonb_path_column = nullptr; |
510 | 105 | const ColumnString* jsonb_path_col = nullptr; |
511 | 105 | bool path_const = false; |
512 | 105 | const NullMap* path_null_map = nullptr; |
513 | 105 | if (arguments.size() == 2) { |
514 | | // we have should have a ColumnString for path |
515 | 78 | std::tie(jsonb_path_column, path_const) = |
516 | 78 | unpack_if_const(block.get_by_position(arguments[1]).column); |
517 | 78 | if (const auto* nullable = |
518 | 78 | check_and_get_column<ColumnNullable>(jsonb_path_column.get())) { |
519 | 10 | jsonb_path_column = nullable->get_nested_column_ptr(); |
520 | 10 | path_null_map = &nullable->get_null_map_data(); |
521 | 10 | } |
522 | 78 | jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get()); |
523 | 78 | } |
524 | | |
525 | 105 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
526 | 105 | NullMap& res_null_map = null_map->get_data(); |
527 | | |
528 | 105 | auto dst_arr = ColumnArray::create( |
529 | 105 | ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()), |
530 | 105 | ColumnArray::ColumnOffsets::create()); |
531 | 105 | auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data()); |
532 | | |
533 | 105 | Status st = std::visit( |
534 | 105 | [&](auto data_const, auto has_path, auto path_const) { |
535 | 105 | return inner_loop_impl<data_const, has_path, path_const>( |
536 | 105 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, |
537 | 105 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); |
538 | 105 | }, _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 534 | 27 | [&](auto data_const, auto has_path, auto path_const) { | 535 | 27 | return inner_loop_impl<data_const, has_path, path_const>( | 536 | 27 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 537 | 27 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 538 | 27 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 534 | 28 | [&](auto data_const, auto has_path, auto path_const) { | 535 | 28 | return inner_loop_impl<data_const, has_path, path_const>( | 536 | 28 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 537 | 28 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 538 | 28 | }, |
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 534 | 48 | [&](auto data_const, auto has_path, auto path_const) { | 535 | 48 | return inner_loop_impl<data_const, has_path, path_const>( | 536 | 48 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 537 | 48 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 538 | 48 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 534 | 2 | [&](auto data_const, auto has_path, auto path_const) { | 535 | 2 | return inner_loop_impl<data_const, has_path, path_const>( | 536 | 2 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, | 537 | 2 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); | 538 | 2 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
539 | 105 | make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column), |
540 | 105 | make_bool_variant(path_const)); |
541 | 105 | if (!st.ok()) { |
542 | 12 | return st; |
543 | 12 | } |
544 | 93 | block.get_by_position(result).column = |
545 | 93 | ColumnNullable::create(std::move(dst_arr), std::move(null_map)); |
546 | 93 | return st; |
547 | 105 | } |
548 | | |
549 | | private: |
550 | | template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST> |
551 | | static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr, |
552 | | ColumnNullable& dst_nested_column, |
553 | | NullMap& res_null_map, |
554 | | const ColumnString& col_from_string, |
555 | | const NullMap* jsonb_data_nullmap, |
556 | | const ColumnString* jsonb_path_column, |
557 | 105 | const NullMap* path_null_map) { |
558 | | // if path is const, we just need to parse it once |
559 | 105 | JsonbPath const_path; |
560 | 105 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { |
561 | 48 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); |
562 | 48 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { |
563 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
564 | 1 | r_raw_ref.to_string()); |
565 | 1 | } |
566 | | |
567 | 47 | if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { |
568 | 2 | return Status::InvalidJsonPath( |
569 | 2 | "In this situation, path expressions may not contain the * and ** tokens " |
570 | 2 | "or an array range."); |
571 | 2 | } |
572 | 47 | } |
573 | | |
574 | 385 | for (size_t i = 0; i < input_rows_count; ++i) { |
575 | 272 | auto index = index_check_const(i, JSONB_DATA_CONST); |
576 | | // if jsonb data is null or path column is null , we should return null |
577 | 272 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { |
578 | 23 | res_null_map[i] = 1; |
579 | 23 | dst_arr.insert_default(); |
580 | 23 | continue; |
581 | 23 | } |
582 | 249 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { |
583 | 73 | if (path_null_map && (*path_null_map)[i]) { |
584 | 8 | res_null_map[i] = 1; |
585 | 8 | dst_arr.insert_default(); |
586 | 8 | continue; |
587 | 8 | } |
588 | 73 | } |
589 | | |
590 | 65 | auto json_data = col_from_string.get_data_at(index); |
591 | 249 | const JsonbDocument* doc = nullptr; |
592 | 249 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); |
593 | 249 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
594 | 0 | dst_arr.clear(); |
595 | 0 | return Status::InvalidArgument("jsonb data is invalid"); |
596 | 0 | } |
597 | 249 | const JsonbValue* obj_val; |
598 | 249 | JsonbFindResult find_result; |
599 | 249 | if constexpr (JSONB_PATH_PARAM) { |
600 | 195 | if constexpr (!JSON_PATH_CONST) { |
601 | 73 | auto data = jsonb_path_column->get_data_at(i); |
602 | 73 | JsonbPath path; |
603 | 73 | if (!path.seek(data.data, data.size)) { |
604 | 5 | return Status::InvalidArgument( |
605 | 5 | "Json path error: Invalid Json Path for value: {} at row: {}", |
606 | 5 | std::string_view(data.data, data.size), i); |
607 | 5 | } |
608 | | |
609 | 68 | if (path.is_wildcard() || path.is_supper_wildcard()) { |
610 | 4 | return Status::InvalidJsonPath( |
611 | 4 | "In this situation, path expressions may not contain the * and ** " |
612 | 4 | "tokens " |
613 | 4 | "or an array range. at row: {}", |
614 | 4 | i); |
615 | 4 | } |
616 | 64 | find_result = doc->getValue()->findValue(path); |
617 | 122 | } else { |
618 | 122 | find_result = doc->getValue()->findValue(const_path); |
619 | 122 | } |
620 | 0 | obj_val = find_result.value; |
621 | 195 | } else { |
622 | 54 | obj_val = doc->getValue(); |
623 | 54 | } |
624 | | |
625 | 249 | if (!obj_val || !obj_val->isObject()) { |
626 | | // if jsonb data is not object we should return null |
627 | 182 | res_null_map[i] = 1; |
628 | 182 | dst_arr.insert_default(); |
629 | 182 | continue; |
630 | 182 | } |
631 | 67 | const auto* obj = obj_val->unpack<ObjectVal>(); |
632 | 76 | for (const auto& it : *obj) { |
633 | 76 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); |
634 | 76 | } |
635 | 67 | dst_arr.get_offsets().push_back(dst_nested_column.size()); |
636 | 67 | } //for |
637 | 113 | return Status::OK(); |
638 | 105 | } _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 557 | 27 | const NullMap* path_null_map) { | 558 | | // if path is const, we just need to parse it once | 559 | 27 | JsonbPath const_path; | 560 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 561 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 562 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 563 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 564 | | r_raw_ref.to_string()); | 565 | | } | 566 | | | 567 | | if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { | 568 | | return Status::InvalidJsonPath( | 569 | | "In this situation, path expressions may not contain the * and ** tokens " | 570 | | "or an array range."); | 571 | | } | 572 | | } | 573 | | | 574 | 85 | for (size_t i = 0; i < input_rows_count; ++i) { | 575 | 58 | auto index = index_check_const(i, JSONB_DATA_CONST); | 576 | | // if jsonb data is null or path column is null , we should return null | 577 | 58 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 578 | 4 | res_null_map[i] = 1; | 579 | 4 | dst_arr.insert_default(); | 580 | 4 | continue; | 581 | 4 | } | 582 | | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 583 | | if (path_null_map && (*path_null_map)[i]) { | 584 | | res_null_map[i] = 1; | 585 | | dst_arr.insert_default(); | 586 | | continue; | 587 | | } | 588 | | } | 589 | | | 590 | 54 | auto json_data = col_from_string.get_data_at(index); | 591 | 54 | const JsonbDocument* doc = nullptr; | 592 | 54 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 593 | 54 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 594 | 0 | dst_arr.clear(); | 595 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 596 | 0 | } | 597 | 54 | const JsonbValue* obj_val; | 598 | 54 | JsonbFindResult find_result; | 599 | | if constexpr (JSONB_PATH_PARAM) { | 600 | | if constexpr (!JSON_PATH_CONST) { | 601 | | auto data = jsonb_path_column->get_data_at(i); | 602 | | JsonbPath path; | 603 | | if (!path.seek(data.data, data.size)) { | 604 | | return Status::InvalidArgument( | 605 | | "Json path error: Invalid Json Path for value: {} at row: {}", | 606 | | std::string_view(data.data, data.size), i); | 607 | | } | 608 | | | 609 | | if (path.is_wildcard() || path.is_supper_wildcard()) { | 610 | | return Status::InvalidJsonPath( | 611 | | "In this situation, path expressions may not contain the * and ** " | 612 | | "tokens " | 613 | | "or an array range. at row: {}", | 614 | | i); | 615 | | } | 616 | | find_result = doc->getValue()->findValue(path); | 617 | | } else { | 618 | | find_result = doc->getValue()->findValue(const_path); | 619 | | } | 620 | | obj_val = find_result.value; | 621 | 54 | } else { | 622 | 54 | obj_val = doc->getValue(); | 623 | 54 | } | 624 | | | 625 | 54 | if (!obj_val || !obj_val->isObject()) { | 626 | | // if jsonb data is not object we should return null | 627 | 36 | res_null_map[i] = 1; | 628 | 36 | dst_arr.insert_default(); | 629 | 36 | continue; | 630 | 36 | } | 631 | 18 | const auto* obj = obj_val->unpack<ObjectVal>(); | 632 | 36 | for (const auto& it : *obj) { | 633 | 36 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 634 | 36 | } | 635 | 18 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 636 | 18 | } //for | 637 | 27 | return Status::OK(); | 638 | 27 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 557 | 28 | const NullMap* path_null_map) { | 558 | | // if path is const, we just need to parse it once | 559 | 28 | JsonbPath const_path; | 560 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 561 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 562 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 563 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 564 | | r_raw_ref.to_string()); | 565 | | } | 566 | | | 567 | | if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { | 568 | | return Status::InvalidJsonPath( | 569 | | "In this situation, path expressions may not contain the * and ** tokens " | 570 | | "or an array range."); | 571 | | } | 572 | | } | 573 | | | 574 | 87 | for (size_t i = 0; i < input_rows_count; ++i) { | 575 | 55 | auto index = index_check_const(i, JSONB_DATA_CONST); | 576 | | // if jsonb data is null or path column is null , we should return null | 577 | 55 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 578 | 6 | res_null_map[i] = 1; | 579 | 6 | dst_arr.insert_default(); | 580 | 6 | continue; | 581 | 6 | } | 582 | 49 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 583 | 49 | if (path_null_map && (*path_null_map)[i]) { | 584 | 4 | res_null_map[i] = 1; | 585 | 4 | dst_arr.insert_default(); | 586 | 4 | continue; | 587 | 4 | } | 588 | 49 | } | 589 | | | 590 | 45 | auto json_data = col_from_string.get_data_at(index); | 591 | 49 | const JsonbDocument* doc = nullptr; | 592 | 49 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 593 | 49 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 594 | 0 | dst_arr.clear(); | 595 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 596 | 0 | } | 597 | 49 | const JsonbValue* obj_val; | 598 | 49 | JsonbFindResult find_result; | 599 | 49 | if constexpr (JSONB_PATH_PARAM) { | 600 | 49 | if constexpr (!JSON_PATH_CONST) { | 601 | 49 | auto data = jsonb_path_column->get_data_at(i); | 602 | 49 | JsonbPath path; | 603 | 49 | if (!path.seek(data.data, data.size)) { | 604 | 5 | return Status::InvalidArgument( | 605 | 5 | "Json path error: Invalid Json Path for value: {} at row: {}", | 606 | 5 | std::string_view(data.data, data.size), i); | 607 | 5 | } | 608 | | | 609 | 44 | if (path.is_wildcard() || path.is_supper_wildcard()) { | 610 | 4 | return Status::InvalidJsonPath( | 611 | 4 | "In this situation, path expressions may not contain the * and ** " | 612 | 4 | "tokens " | 613 | 4 | "or an array range. at row: {}", | 614 | 4 | i); | 615 | 4 | } | 616 | 40 | find_result = doc->getValue()->findValue(path); | 617 | | } else { | 618 | | find_result = doc->getValue()->findValue(const_path); | 619 | | } | 620 | 0 | obj_val = find_result.value; | 621 | | } else { | 622 | | obj_val = doc->getValue(); | 623 | | } | 624 | | | 625 | 49 | if (!obj_val || !obj_val->isObject()) { | 626 | | // if jsonb data is not object we should return null | 627 | 25 | res_null_map[i] = 1; | 628 | 25 | dst_arr.insert_default(); | 629 | 25 | continue; | 630 | 25 | } | 631 | 24 | const auto* obj = obj_val->unpack<ObjectVal>(); | 632 | 24 | for (const auto& it : *obj) { | 633 | 15 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 634 | 15 | } | 635 | 24 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 636 | 24 | } //for | 637 | 32 | return Status::OK(); | 638 | 28 | } |
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 557 | 48 | const NullMap* path_null_map) { | 558 | | // if path is const, we just need to parse it once | 559 | 48 | JsonbPath const_path; | 560 | 48 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 561 | 48 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 562 | 48 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 563 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 564 | 1 | r_raw_ref.to_string()); | 565 | 1 | } | 566 | | | 567 | 47 | if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { | 568 | 2 | return Status::InvalidJsonPath( | 569 | 2 | "In this situation, path expressions may not contain the * and ** tokens " | 570 | 2 | "or an array range."); | 571 | 2 | } | 572 | 47 | } | 573 | | | 574 | 183 | for (size_t i = 0; i < input_rows_count; ++i) { | 575 | 135 | auto index = index_check_const(i, JSONB_DATA_CONST); | 576 | | // if jsonb data is null or path column is null , we should return null | 577 | 135 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 578 | 13 | res_null_map[i] = 1; | 579 | 13 | dst_arr.insert_default(); | 580 | 13 | continue; | 581 | 13 | } | 582 | | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 583 | | if (path_null_map && (*path_null_map)[i]) { | 584 | | res_null_map[i] = 1; | 585 | | dst_arr.insert_default(); | 586 | | continue; | 587 | | } | 588 | | } | 589 | | | 590 | 122 | auto json_data = col_from_string.get_data_at(index); | 591 | 122 | const JsonbDocument* doc = nullptr; | 592 | 122 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 593 | 122 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 594 | 0 | dst_arr.clear(); | 595 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 596 | 0 | } | 597 | 122 | const JsonbValue* obj_val; | 598 | 122 | JsonbFindResult find_result; | 599 | 122 | if constexpr (JSONB_PATH_PARAM) { | 600 | | if constexpr (!JSON_PATH_CONST) { | 601 | | auto data = jsonb_path_column->get_data_at(i); | 602 | | JsonbPath path; | 603 | | if (!path.seek(data.data, data.size)) { | 604 | | return Status::InvalidArgument( | 605 | | "Json path error: Invalid Json Path for value: {} at row: {}", | 606 | | std::string_view(data.data, data.size), i); | 607 | | } | 608 | | | 609 | | if (path.is_wildcard() || path.is_supper_wildcard()) { | 610 | | return Status::InvalidJsonPath( | 611 | | "In this situation, path expressions may not contain the * and ** " | 612 | | "tokens " | 613 | | "or an array range. at row: {}", | 614 | | i); | 615 | | } | 616 | | find_result = doc->getValue()->findValue(path); | 617 | 122 | } else { | 618 | 122 | find_result = doc->getValue()->findValue(const_path); | 619 | 122 | } | 620 | 122 | obj_val = find_result.value; | 621 | | } else { | 622 | | obj_val = doc->getValue(); | 623 | | } | 624 | | | 625 | 122 | if (!obj_val || !obj_val->isObject()) { | 626 | | // if jsonb data is not object we should return null | 627 | 113 | res_null_map[i] = 1; | 628 | 113 | dst_arr.insert_default(); | 629 | 113 | continue; | 630 | 113 | } | 631 | 9 | const auto* obj = obj_val->unpack<ObjectVal>(); | 632 | 9 | for (const auto& it : *obj) { | 633 | 9 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 634 | 9 | } | 635 | 9 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 636 | 9 | } //for | 637 | 48 | return Status::OK(); | 638 | 48 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Line | Count | Source | 557 | 2 | const NullMap* path_null_map) { | 558 | | // if path is const, we just need to parse it once | 559 | 2 | JsonbPath const_path; | 560 | | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { | 561 | | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); | 562 | | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { | 563 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 564 | | r_raw_ref.to_string()); | 565 | | } | 566 | | | 567 | | if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { | 568 | | return Status::InvalidJsonPath( | 569 | | "In this situation, path expressions may not contain the * and ** tokens " | 570 | | "or an array range."); | 571 | | } | 572 | | } | 573 | | | 574 | 30 | for (size_t i = 0; i < input_rows_count; ++i) { | 575 | 24 | auto index = index_check_const(i, JSONB_DATA_CONST); | 576 | | // if jsonb data is null or path column is null , we should return null | 577 | 24 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { | 578 | 0 | res_null_map[i] = 1; | 579 | 0 | dst_arr.insert_default(); | 580 | 0 | continue; | 581 | 0 | } | 582 | 24 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { | 583 | 24 | if (path_null_map && (*path_null_map)[i]) { | 584 | 4 | res_null_map[i] = 1; | 585 | 4 | dst_arr.insert_default(); | 586 | 4 | continue; | 587 | 4 | } | 588 | 24 | } | 589 | | | 590 | 20 | auto json_data = col_from_string.get_data_at(index); | 591 | 24 | const JsonbDocument* doc = nullptr; | 592 | 24 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); | 593 | 24 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 594 | 0 | dst_arr.clear(); | 595 | 0 | return Status::InvalidArgument("jsonb data is invalid"); | 596 | 0 | } | 597 | 24 | const JsonbValue* obj_val; | 598 | 24 | JsonbFindResult find_result; | 599 | 24 | if constexpr (JSONB_PATH_PARAM) { | 600 | 24 | if constexpr (!JSON_PATH_CONST) { | 601 | 24 | auto data = jsonb_path_column->get_data_at(i); | 602 | 24 | JsonbPath path; | 603 | 24 | if (!path.seek(data.data, data.size)) { | 604 | 0 | return Status::InvalidArgument( | 605 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", | 606 | 0 | std::string_view(data.data, data.size), i); | 607 | 0 | } | 608 | | | 609 | 24 | if (path.is_wildcard() || path.is_supper_wildcard()) { | 610 | 0 | return Status::InvalidJsonPath( | 611 | 0 | "In this situation, path expressions may not contain the * and ** " | 612 | 0 | "tokens " | 613 | 0 | "or an array range. at row: {}", | 614 | 0 | i); | 615 | 0 | } | 616 | 24 | find_result = doc->getValue()->findValue(path); | 617 | | } else { | 618 | | find_result = doc->getValue()->findValue(const_path); | 619 | | } | 620 | 0 | obj_val = find_result.value; | 621 | | } else { | 622 | | obj_val = doc->getValue(); | 623 | | } | 624 | | | 625 | 24 | if (!obj_val || !obj_val->isObject()) { | 626 | | // if jsonb data is not object we should return null | 627 | 8 | res_null_map[i] = 1; | 628 | 8 | dst_arr.insert_default(); | 629 | 8 | continue; | 630 | 8 | } | 631 | 16 | const auto* obj = obj_val->unpack<ObjectVal>(); | 632 | 16 | for (const auto& it : *obj) { | 633 | 16 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); | 634 | 16 | } | 635 | 16 | dst_arr.get_offsets().push_back(dst_nested_column.size()); | 636 | 16 | } //for | 637 | 6 | return Status::OK(); | 638 | 2 | } |
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ |
639 | | }; |
640 | | |
641 | | class FunctionJsonbExtractPath : public IFunction { |
642 | | public: |
643 | | static constexpr auto name = "json_exists_path"; |
644 | | static constexpr auto alias = "jsonb_exists_path"; |
645 | | using ColumnType = ColumnUInt8; |
646 | | using Container = typename ColumnType::Container; |
647 | 183 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); } |
648 | 1 | String get_name() const override { return name; } |
649 | 174 | size_t get_number_of_arguments() const override { return 2; } |
650 | 174 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
651 | | // it only needs to indicate existence and does not need to return nullable values. |
652 | 174 | const auto nullable = std::ranges::any_of( |
653 | 196 | arguments, [](const DataTypePtr& type) { return type->is_nullable(); }); |
654 | 174 | if (nullable) { |
655 | 153 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
656 | 153 | } else { |
657 | 21 | return std::make_shared<DataTypeUInt8>(); |
658 | 21 | } |
659 | 174 | } |
660 | | |
661 | 1.53k | bool use_default_implementation_for_nulls() const override { return false; } |
662 | | |
663 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
664 | 1.36k | uint32_t result, size_t input_rows_count) const override { |
665 | | // prepare jsonb data column |
666 | 1.36k | auto&& [jsonb_data_column, jsonb_data_const] = |
667 | 1.36k | unpack_if_const(block.get_by_position(arguments[0]).column); |
668 | | |
669 | 1.36k | const NullMap* data_null_map = nullptr; |
670 | 1.36k | const ColumnString* data_col = nullptr; |
671 | 1.36k | if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) { |
672 | 1.17k | data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
673 | 1.17k | data_null_map = &nullable->get_null_map_data(); |
674 | 1.17k | } else { |
675 | 184 | data_col = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
676 | 184 | } |
677 | | |
678 | 1.36k | const auto& ldata = data_col->get_chars(); |
679 | 1.36k | const auto& loffsets = data_col->get_offsets(); |
680 | | |
681 | | // prepare parse path column prepare |
682 | 1.36k | auto&& [path_column, path_const] = |
683 | 1.36k | unpack_if_const(block.get_by_position(arguments[1]).column); |
684 | 1.36k | const ColumnString* path_col = nullptr; |
685 | 1.36k | const NullMap* path_null_map = nullptr; |
686 | 1.36k | if (const auto* nullable = check_and_get_column<ColumnNullable>(path_column.get())) { |
687 | 7 | path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
688 | 7 | path_null_map = &nullable->get_null_map_data(); |
689 | 1.35k | } else { |
690 | 1.35k | path_col = assert_cast<const ColumnString*>(path_column.get()); |
691 | 1.35k | } |
692 | | |
693 | 1.36k | DORIS_CHECK(!(jsonb_data_const && path_const)) |
694 | 0 | << "jsonb_data_const and path_const should not be both const"; |
695 | | |
696 | 1.36k | auto create_all_null_result = [&]() { |
697 | 3 | auto res = ColumnType::create(); |
698 | 3 | res->insert_default(); |
699 | 3 | auto nullable_column = |
700 | 3 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
701 | 3 | auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count); |
702 | 3 | block.get_by_position(result).column = std::move(const_column); |
703 | 3 | return Status::OK(); |
704 | 3 | }; |
705 | | |
706 | 1.36k | ColumnUInt8::MutablePtr result_null_map_column; |
707 | 1.36k | NullMap* result_null_map = nullptr; |
708 | 1.36k | if (data_null_map || path_null_map) { |
709 | 1.17k | result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
710 | 1.17k | result_null_map = &result_null_map_column->get_data(); |
711 | | |
712 | 1.17k | if (data_null_map) { |
713 | 1.17k | VectorizedUtils::update_null_map(*result_null_map, *data_null_map, |
714 | 1.17k | jsonb_data_const); |
715 | 1.17k | } |
716 | | |
717 | 1.17k | if (path_null_map) { |
718 | 7 | VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const); |
719 | 7 | } |
720 | | |
721 | 1.17k | if (!simd::contain_zero(result_null_map->data(), input_rows_count)) { |
722 | 3 | return create_all_null_result(); |
723 | 3 | } |
724 | 1.17k | } |
725 | | |
726 | 1.35k | auto res = ColumnType::create(); |
727 | | |
728 | 1.35k | bool is_invalid_json_path = false; |
729 | | |
730 | 1.35k | const auto& rdata = path_col->get_chars(); |
731 | 1.35k | const auto& roffsets = path_col->get_offsets(); |
732 | 1.35k | if (jsonb_data_const) { |
733 | 2 | if (data_null_map && (*data_null_map)[0]) { |
734 | 0 | return create_all_null_result(); |
735 | 0 | } |
736 | 2 | scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(), |
737 | 2 | result_null_map, is_invalid_json_path); |
738 | 1.35k | } else if (path_const) { |
739 | 1.32k | if (path_null_map && (*path_null_map)[0]) { |
740 | 0 | return create_all_null_result(); |
741 | 0 | } |
742 | 1.32k | vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(), |
743 | 1.32k | result_null_map, is_invalid_json_path); |
744 | 1.32k | } else { |
745 | 35 | vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), |
746 | 35 | result_null_map, is_invalid_json_path); |
747 | 35 | } |
748 | 1.35k | if (is_invalid_json_path) { |
749 | 7 | return Status::InvalidArgument( |
750 | 7 | "Json path error: Invalid Json Path for value: {}", |
751 | 7 | std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size())); |
752 | 7 | } |
753 | | |
754 | 1.35k | if (result_null_map) { |
755 | 1.17k | auto nullabel_col = |
756 | 1.17k | ColumnNullable::create(std::move(res), std::move(result_null_map_column)); |
757 | 1.17k | block.get_by_position(result).column = std::move(nullabel_col); |
758 | 1.17k | } else { |
759 | 181 | block.get_by_position(result).column = std::move(res); |
760 | 181 | } |
761 | 1.35k | return Status::OK(); |
762 | 1.35k | } |
763 | | |
764 | | private: |
765 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, |
766 | 3.01k | size_t l_str_size, JsonbPath& path) { |
767 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
768 | 3.01k | const JsonbDocument* doc = nullptr; |
769 | 3.01k | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
770 | 3.01k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
771 | 0 | return; |
772 | 0 | } |
773 | | |
774 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
775 | 3.01k | auto result = doc->getValue()->findValue(path); |
776 | | |
777 | 3.01k | if (result.value) { |
778 | 445 | res[i] = 1; |
779 | 445 | } |
780 | 3.01k | } |
781 | | static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
782 | | const ColumnString::Offsets& loffsets, |
783 | | const ColumnString::Chars& rdata, |
784 | | const ColumnString::Offsets& roffsets, Container& res, |
785 | 35 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
786 | 35 | const size_t size = loffsets.size(); |
787 | 35 | res.resize_fill(size, 0); |
788 | | |
789 | 80 | for (size_t i = 0; i < size; i++) { |
790 | 50 | if (result_null_map && (*result_null_map)[i]) { |
791 | 8 | continue; |
792 | 8 | } |
793 | | |
794 | 42 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
795 | 42 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
796 | | |
797 | 42 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
798 | 42 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
799 | | |
800 | 42 | JsonbPath path; |
801 | 42 | if (!path.seek(r_raw_str, r_str_size)) { |
802 | 5 | is_invalid_json_path = true; |
803 | 5 | return; |
804 | 5 | } |
805 | | |
806 | 37 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
807 | 37 | } |
808 | 35 | } |
809 | | static void scalar_vector(FunctionContext* context, const StringRef& ldata, |
810 | | const ColumnString::Chars& rdata, |
811 | | const ColumnString::Offsets& roffsets, Container& res, |
812 | 2 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
813 | 2 | const size_t size = roffsets.size(); |
814 | 2 | res.resize_fill(size, 0); |
815 | | |
816 | 14 | for (size_t i = 0; i < size; i++) { |
817 | 13 | if (result_null_map && (*result_null_map)[i]) { |
818 | 4 | continue; |
819 | 4 | } |
820 | 9 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
821 | 9 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
822 | | |
823 | 9 | JsonbPath path; |
824 | 9 | if (!path.seek(r_raw_str, r_str_size)) { |
825 | 1 | is_invalid_json_path = true; |
826 | 1 | return; |
827 | 1 | } |
828 | | |
829 | 8 | inner_loop_impl(i, res, ldata.data, ldata.size, path); |
830 | 8 | } |
831 | 2 | } |
832 | | static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
833 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
834 | | Container& res, const NullMap* result_null_map, |
835 | 1.32k | bool& is_invalid_json_path) { |
836 | 1.32k | const size_t size = loffsets.size(); |
837 | 1.32k | res.resize_fill(size, 0); |
838 | | |
839 | 1.32k | JsonbPath path; |
840 | 1.32k | if (!path.seek(rdata.data, rdata.size)) { |
841 | 1 | is_invalid_json_path = true; |
842 | 1 | return; |
843 | 1 | } |
844 | | |
845 | 4.51k | for (size_t i = 0; i < size; i++) { |
846 | 3.19k | if (result_null_map && (*result_null_map)[i]) { |
847 | 232 | continue; |
848 | 232 | } |
849 | 2.96k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
850 | 2.96k | int l_str_size = loffsets[i] - loffsets[i - 1]; |
851 | | |
852 | 2.96k | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
853 | 2.96k | } |
854 | 1.32k | } |
855 | | }; |
856 | | |
857 | | template <typename ValueType> |
858 | | struct JsonbExtractStringImpl { |
859 | | using ReturnType = typename ValueType::ReturnType; |
860 | | using ColumnType = typename ValueType::ColumnType; |
861 | | |
862 | | private: |
863 | | static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i, |
864 | | ColumnString::Chars& res_data, |
865 | | ColumnString::Offsets& res_offsets, NullMap& null_map, |
866 | | std::unique_ptr<JsonbToJson>& formater, |
867 | 139k | const char* l_raw, size_t l_size, JsonbPath& path) { |
868 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
869 | 139k | const JsonbDocument* doc = nullptr; |
870 | 139k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
871 | 139k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
872 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
873 | 0 | return; |
874 | 0 | } |
875 | | |
876 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
877 | 139k | auto find_result = doc->getValue()->findValue(path); |
878 | | |
879 | 139k | if (UNLIKELY(!find_result.value)) { |
880 | 20.9k | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
881 | 20.9k | return; |
882 | 20.9k | } |
883 | | |
884 | 118k | if constexpr (ValueType::only_get_type) { |
885 | 429 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, |
886 | 429 | res_data, res_offsets); |
887 | 429 | return; |
888 | 117k | } else { |
889 | 117k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); |
890 | 117k | if constexpr (ValueType::no_quotes) { |
891 | 5 | if (find_result.value->isString()) { |
892 | 4 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); |
893 | 4 | const auto* blob = str_value->getBlob(); |
894 | 4 | if (str_value->length() > 1 && blob[0] == '"' && |
895 | 4 | blob[str_value->length() - 1] == '"') { |
896 | 0 | writer->writeStartString(); |
897 | 0 | writer->writeString(blob + 1, str_value->length() - 2); |
898 | 0 | writer->writeEndString(); |
899 | 0 | StringOP::push_value_string( |
900 | 0 | std::string_view(writer->getOutput()->getBuffer(), |
901 | 0 | writer->getOutput()->getSize()), |
902 | 0 | i, res_data, res_offsets); |
903 | 0 | return; |
904 | 0 | } |
905 | 4 | } |
906 | 5 | } |
907 | 5 | writer->writeValueSimple(find_result.value); |
908 | 117k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
909 | 117k | writer->getOutput()->getSize()), |
910 | 117k | i, res_data, res_offsets); |
911 | 117k | } |
912 | 118k | } _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 867 | 2.98k | const char* l_raw, size_t l_size, JsonbPath& path) { | 868 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 869 | 2.98k | const JsonbDocument* doc = nullptr; | 870 | 2.98k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 871 | 2.98k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 872 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 873 | 0 | return; | 874 | 0 | } | 875 | | | 876 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 877 | 2.98k | auto find_result = doc->getValue()->findValue(path); | 878 | | | 879 | 2.98k | if (UNLIKELY(!find_result.value)) { | 880 | 2.55k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 881 | 2.55k | return; | 882 | 2.55k | } | 883 | | | 884 | 429 | if constexpr (ValueType::only_get_type) { | 885 | 429 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 886 | 429 | res_data, res_offsets); | 887 | 429 | return; | 888 | | } else { | 889 | | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 890 | | if constexpr (ValueType::no_quotes) { | 891 | | if (find_result.value->isString()) { | 892 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 893 | | const auto* blob = str_value->getBlob(); | 894 | | if (str_value->length() > 1 && blob[0] == '"' && | 895 | | blob[str_value->length() - 1] == '"') { | 896 | | writer->writeStartString(); | 897 | | writer->writeString(blob + 1, str_value->length() - 2); | 898 | | writer->writeEndString(); | 899 | | StringOP::push_value_string( | 900 | | std::string_view(writer->getOutput()->getBuffer(), | 901 | | writer->getOutput()->getSize()), | 902 | | i, res_data, res_offsets); | 903 | | return; | 904 | | } | 905 | | } | 906 | | } | 907 | | writer->writeValueSimple(find_result.value); | 908 | | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 909 | | writer->getOutput()->getSize()), | 910 | | i, res_data, res_offsets); | 911 | | } | 912 | 429 | } |
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 867 | 136k | const char* l_raw, size_t l_size, JsonbPath& path) { | 868 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 869 | 136k | const JsonbDocument* doc = nullptr; | 870 | 136k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 871 | 136k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 872 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 873 | 0 | return; | 874 | 0 | } | 875 | | | 876 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 877 | 136k | auto find_result = doc->getValue()->findValue(path); | 878 | | | 879 | 136k | if (UNLIKELY(!find_result.value)) { | 880 | 18.3k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 881 | 18.3k | return; | 882 | 18.3k | } | 883 | | | 884 | | if constexpr (ValueType::only_get_type) { | 885 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 886 | | res_data, res_offsets); | 887 | | return; | 888 | 117k | } else { | 889 | 117k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 890 | | if constexpr (ValueType::no_quotes) { | 891 | | if (find_result.value->isString()) { | 892 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 893 | | const auto* blob = str_value->getBlob(); | 894 | | if (str_value->length() > 1 && blob[0] == '"' && | 895 | | blob[str_value->length() - 1] == '"') { | 896 | | writer->writeStartString(); | 897 | | writer->writeString(blob + 1, str_value->length() - 2); | 898 | | writer->writeEndString(); | 899 | | StringOP::push_value_string( | 900 | | std::string_view(writer->getOutput()->getBuffer(), | 901 | | writer->getOutput()->getSize()), | 902 | | i, res_data, res_offsets); | 903 | | return; | 904 | | } | 905 | | } | 906 | | } | 907 | 117k | writer->writeValueSimple(find_result.value); | 908 | 117k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 909 | 117k | writer->getOutput()->getSize()), | 910 | 117k | i, res_data, res_offsets); | 911 | 117k | } | 912 | 117k | } |
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 867 | 5 | const char* l_raw, size_t l_size, JsonbPath& path) { | 868 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 869 | 5 | const JsonbDocument* doc = nullptr; | 870 | 5 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 871 | 5 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 872 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 873 | 0 | return; | 874 | 0 | } | 875 | | | 876 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 877 | 5 | auto find_result = doc->getValue()->findValue(path); | 878 | | | 879 | 5 | if (UNLIKELY(!find_result.value)) { | 880 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 881 | 0 | return; | 882 | 0 | } | 883 | | | 884 | | if constexpr (ValueType::only_get_type) { | 885 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 886 | | res_data, res_offsets); | 887 | | return; | 888 | 5 | } else { | 889 | 5 | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 890 | 5 | if constexpr (ValueType::no_quotes) { | 891 | 5 | if (find_result.value->isString()) { | 892 | 4 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 893 | 4 | const auto* blob = str_value->getBlob(); | 894 | 4 | if (str_value->length() > 1 && blob[0] == '"' && | 895 | 4 | blob[str_value->length() - 1] == '"') { | 896 | 0 | writer->writeStartString(); | 897 | 0 | writer->writeString(blob + 1, str_value->length() - 2); | 898 | 0 | writer->writeEndString(); | 899 | 0 | StringOP::push_value_string( | 900 | 0 | std::string_view(writer->getOutput()->getBuffer(), | 901 | 0 | writer->getOutput()->getSize()), | 902 | 0 | i, res_data, res_offsets); | 903 | 0 | return; | 904 | 0 | } | 905 | 4 | } | 906 | 5 | } | 907 | 5 | writer->writeValueSimple(find_result.value); | 908 | 5 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 909 | 5 | writer->getOutput()->getSize()), | 910 | 5 | i, res_data, res_offsets); | 911 | 5 | } | 912 | 5 | } |
|
913 | | |
914 | | public: |
915 | | // for jsonb_extract_string |
916 | | static Status vector_vector_v2( |
917 | | FunctionContext* context, const ColumnString::Chars& ldata, |
918 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
919 | | const bool& json_data_const, |
920 | | const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths |
921 | | const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const, |
922 | 11.3k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { |
923 | 11.3k | const size_t input_rows_count = null_map.size(); |
924 | 11.3k | res_offsets.resize(input_rows_count); |
925 | | |
926 | 11.3k | auto writer = std::make_unique<JsonbWriter>(); |
927 | 11.3k | std::unique_ptr<JsonbToJson> formater; |
928 | | |
929 | | // reuseable json path list, espacially for const path |
930 | 11.3k | std::vector<JsonbPath> json_path_list; |
931 | 11.3k | json_path_list.resize(rdata_columns.size()); |
932 | | |
933 | | // lambda function to parse json path for row i and path pi |
934 | 11.7k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { |
935 | 11.7k | const auto index = index_check_const(i, path_const[pi]); |
936 | | |
937 | 11.7k | const ColumnString* path_col = rdata_columns[pi]; |
938 | 11.7k | const ColumnString::Chars& rdata = path_col->get_chars(); |
939 | 11.7k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); |
940 | 11.7k | size_t r_off = roffsets[index - 1]; |
941 | 11.7k | size_t r_size = roffsets[index] - r_off; |
942 | 11.7k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); |
943 | | |
944 | 11.7k | JsonbPath path; |
945 | 11.7k | if (!path.seek(r_raw, r_size)) { |
946 | 7 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
947 | 7 | std::string_view(r_raw, r_size)); |
948 | 7 | } |
949 | | |
950 | 11.7k | json_path_list[pi] = std::move(path); |
951 | | |
952 | 11.7k | return Status::OK(); |
953 | 11.7k | }; _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 934 | 1.34k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 1.34k | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 1.34k | const ColumnString* path_col = rdata_columns[pi]; | 938 | 1.34k | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 1.34k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 1.34k | size_t r_off = roffsets[index - 1]; | 941 | 1.34k | size_t r_size = roffsets[index] - r_off; | 942 | 1.34k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 1.34k | JsonbPath path; | 945 | 1.34k | if (!path.seek(r_raw, r_size)) { | 946 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 1 | std::string_view(r_raw, r_size)); | 948 | 1 | } | 949 | | | 950 | 1.33k | json_path_list[pi] = std::move(path); | 951 | | | 952 | 1.33k | return Status::OK(); | 953 | 1.34k | }; |
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 934 | 10.3k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 10.3k | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 10.3k | const ColumnString* path_col = rdata_columns[pi]; | 938 | 10.3k | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 10.3k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 10.3k | size_t r_off = roffsets[index - 1]; | 941 | 10.3k | size_t r_size = roffsets[index] - r_off; | 942 | 10.3k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 10.3k | JsonbPath path; | 945 | 10.3k | if (!path.seek(r_raw, r_size)) { | 946 | 6 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 6 | std::string_view(r_raw, r_size)); | 948 | 6 | } | 949 | | | 950 | 10.3k | json_path_list[pi] = std::move(path); | 951 | | | 952 | 10.3k | return Status::OK(); | 953 | 10.3k | }; |
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 934 | 11 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 11 | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 11 | const ColumnString* path_col = rdata_columns[pi]; | 938 | 11 | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 11 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 11 | size_t r_off = roffsets[index - 1]; | 941 | 11 | size_t r_size = roffsets[index] - r_off; | 942 | 11 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 11 | JsonbPath path; | 945 | 11 | if (!path.seek(r_raw, r_size)) { | 946 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 0 | std::string_view(r_raw, r_size)); | 948 | 0 | } | 949 | | | 950 | 11 | json_path_list[pi] = std::move(path); | 951 | | | 952 | 11 | return Status::OK(); | 953 | 11 | }; |
|
954 | | |
955 | 23.0k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { |
956 | 11.6k | if (path_const[pi]) { |
957 | 11.3k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { |
958 | 41 | continue; |
959 | 41 | } |
960 | 11.3k | RETURN_IF_ERROR(parse_json_path(0, pi)); |
961 | 11.3k | } |
962 | 11.6k | } |
963 | | |
964 | 11.3k | res_data.reserve(ldata.size()); |
965 | 152k | for (size_t i = 0; i < input_rows_count; ++i) { |
966 | 141k | if (null_map[i]) { |
967 | 0 | continue; |
968 | 0 | } |
969 | | |
970 | 141k | const auto data_index = index_check_const(i, json_data_const); |
971 | 141k | if (l_null_map && (*l_null_map)[data_index]) { |
972 | 1.91k | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
973 | 1.91k | continue; |
974 | 1.91k | } |
975 | | |
976 | 139k | size_t l_off = loffsets[data_index - 1]; |
977 | 139k | size_t l_size = loffsets[data_index] - l_off; |
978 | 139k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); |
979 | 139k | if (rdata_columns.size() == 1) { // just return origin value |
980 | 138k | const auto path_index = index_check_const(i, path_const[0]); |
981 | 138k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { |
982 | 33 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
983 | 33 | continue; |
984 | 33 | } |
985 | | |
986 | 138k | if (!path_const[0]) { |
987 | 328 | RETURN_IF_ERROR(parse_json_path(i, 0)); |
988 | 328 | } |
989 | | |
990 | 138k | writer->reset(); |
991 | 138k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, |
992 | 138k | l_size, json_path_list[0]); |
993 | 138k | } else { // will make array string to user |
994 | 654 | writer->reset(); |
995 | 654 | bool has_value = false; |
996 | | |
997 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
998 | 654 | const JsonbDocument* doc = nullptr; |
999 | 654 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
1000 | | |
1001 | 1.78k | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { |
1002 | 1.23k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1003 | 0 | continue; |
1004 | 0 | } |
1005 | | |
1006 | 1.23k | const auto path_index = index_check_const(i, path_const[pi]); |
1007 | 1.23k | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { |
1008 | 99 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1009 | 99 | break; |
1010 | 99 | } |
1011 | | |
1012 | 1.13k | if (!path_const[pi]) { |
1013 | 70 | RETURN_IF_ERROR(parse_json_path(i, pi)); |
1014 | 70 | } |
1015 | | |
1016 | 1.13k | auto find_result = doc->getValue()->findValue(json_path_list[pi]); |
1017 | | |
1018 | 1.13k | if (find_result.value) { |
1019 | 282 | if (!has_value) { |
1020 | 153 | has_value = true; |
1021 | 153 | writer->writeStartArray(); |
1022 | 153 | } |
1023 | 282 | if (find_result.value->isArray() && find_result.is_wildcard) { |
1024 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], |
1025 | | // if value is array, we should write all items in array, instead of write the array itself. |
1026 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] |
1027 | 54 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { |
1028 | 54 | writer->writeValue(&item); |
1029 | 54 | } |
1030 | 259 | } else { |
1031 | 259 | writer->writeValue(find_result.value); |
1032 | 259 | } |
1033 | 282 | } |
1034 | 1.13k | } |
1035 | 654 | if (has_value) { |
1036 | 153 | writer->writeEndArray(); |
1037 | 153 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
1038 | 153 | writer->getOutput()->getSize()), |
1039 | 153 | i, res_data, res_offsets); |
1040 | 501 | } else { |
1041 | 501 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1042 | 501 | } |
1043 | 654 | } |
1044 | 139k | } //for |
1045 | 11.3k | return Status::OK(); |
1046 | 11.3k | } _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 922 | 1.32k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 923 | 1.32k | const size_t input_rows_count = null_map.size(); | 924 | 1.32k | res_offsets.resize(input_rows_count); | 925 | | | 926 | 1.32k | auto writer = std::make_unique<JsonbWriter>(); | 927 | 1.32k | std::unique_ptr<JsonbToJson> formater; | 928 | | | 929 | | // reuseable json path list, espacially for const path | 930 | 1.32k | std::vector<JsonbPath> json_path_list; | 931 | 1.32k | json_path_list.resize(rdata_columns.size()); | 932 | | | 933 | | // lambda function to parse json path for row i and path pi | 934 | 1.32k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 1.32k | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 1.32k | const ColumnString* path_col = rdata_columns[pi]; | 938 | 1.32k | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 1.32k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 1.32k | size_t r_off = roffsets[index - 1]; | 941 | 1.32k | size_t r_size = roffsets[index] - r_off; | 942 | 1.32k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 1.32k | JsonbPath path; | 945 | 1.32k | if (!path.seek(r_raw, r_size)) { | 946 | 1.32k | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 1.32k | std::string_view(r_raw, r_size)); | 948 | 1.32k | } | 949 | | | 950 | 1.32k | json_path_list[pi] = std::move(path); | 951 | | | 952 | 1.32k | return Status::OK(); | 953 | 1.32k | }; | 954 | | | 955 | 2.65k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 956 | 1.32k | if (path_const[pi]) { | 957 | 1.32k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 958 | 1 | continue; | 959 | 1 | } | 960 | 1.32k | RETURN_IF_ERROR(parse_json_path(0, pi)); | 961 | 1.32k | } | 962 | 1.32k | } | 963 | | | 964 | 1.32k | res_data.reserve(ldata.size()); | 965 | 4.57k | for (size_t i = 0; i < input_rows_count; ++i) { | 966 | 3.24k | if (null_map[i]) { | 967 | 0 | continue; | 968 | 0 | } | 969 | | | 970 | 3.24k | const auto data_index = index_check_const(i, json_data_const); | 971 | 3.24k | if (l_null_map && (*l_null_map)[data_index]) { | 972 | 248 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 973 | 248 | continue; | 974 | 248 | } | 975 | | | 976 | 3.00k | size_t l_off = loffsets[data_index - 1]; | 977 | 3.00k | size_t l_size = loffsets[data_index] - l_off; | 978 | 3.00k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 979 | 3.00k | if (rdata_columns.size() == 1) { // just return origin value | 980 | 3.00k | const auto path_index = index_check_const(i, path_const[0]); | 981 | 3.00k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 982 | 16 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 983 | 16 | continue; | 984 | 16 | } | 985 | | | 986 | 2.98k | if (!path_const[0]) { | 987 | 18 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 988 | 18 | } | 989 | | | 990 | 2.98k | writer->reset(); | 991 | 2.98k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 992 | 2.98k | l_size, json_path_list[0]); | 993 | 2.98k | } else { // will make array string to user | 994 | 0 | writer->reset(); | 995 | 0 | bool has_value = false; | 996 | | | 997 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 998 | 0 | const JsonbDocument* doc = nullptr; | 999 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1000 | |
| 1001 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1002 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1003 | 0 | continue; | 1004 | 0 | } | 1005 | | | 1006 | 0 | const auto path_index = index_check_const(i, path_const[pi]); | 1007 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1008 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1009 | 0 | break; | 1010 | 0 | } | 1011 | | | 1012 | 0 | if (!path_const[pi]) { | 1013 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1014 | 0 | } | 1015 | | | 1016 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1017 | |
| 1018 | 0 | if (find_result.value) { | 1019 | 0 | if (!has_value) { | 1020 | 0 | has_value = true; | 1021 | 0 | writer->writeStartArray(); | 1022 | 0 | } | 1023 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1024 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1025 | | // if value is array, we should write all items in array, instead of write the array itself. | 1026 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1027 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1028 | 0 | writer->writeValue(&item); | 1029 | 0 | } | 1030 | 0 | } else { | 1031 | 0 | writer->writeValue(find_result.value); | 1032 | 0 | } | 1033 | 0 | } | 1034 | 0 | } | 1035 | 0 | if (has_value) { | 1036 | 0 | writer->writeEndArray(); | 1037 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1038 | 0 | writer->getOutput()->getSize()), | 1039 | 0 | i, res_data, res_offsets); | 1040 | 0 | } else { | 1041 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1042 | 0 | } | 1043 | 0 | } | 1044 | 3.00k | } //for | 1045 | 1.32k | return Status::OK(); | 1046 | 1.32k | } |
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 922 | 10.0k | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 923 | 10.0k | const size_t input_rows_count = null_map.size(); | 924 | 10.0k | res_offsets.resize(input_rows_count); | 925 | | | 926 | 10.0k | auto writer = std::make_unique<JsonbWriter>(); | 927 | 10.0k | std::unique_ptr<JsonbToJson> formater; | 928 | | | 929 | | // reuseable json path list, espacially for const path | 930 | 10.0k | std::vector<JsonbPath> json_path_list; | 931 | 10.0k | json_path_list.resize(rdata_columns.size()); | 932 | | | 933 | | // lambda function to parse json path for row i and path pi | 934 | 10.0k | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 10.0k | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 10.0k | const ColumnString* path_col = rdata_columns[pi]; | 938 | 10.0k | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 10.0k | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 10.0k | size_t r_off = roffsets[index - 1]; | 941 | 10.0k | size_t r_size = roffsets[index] - r_off; | 942 | 10.0k | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 10.0k | JsonbPath path; | 945 | 10.0k | if (!path.seek(r_raw, r_size)) { | 946 | 10.0k | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 10.0k | std::string_view(r_raw, r_size)); | 948 | 10.0k | } | 949 | | | 950 | 10.0k | json_path_list[pi] = std::move(path); | 951 | | | 952 | 10.0k | return Status::OK(); | 953 | 10.0k | }; | 954 | | | 955 | 20.3k | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 956 | 10.3k | if (path_const[pi]) { | 957 | 10.0k | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 958 | 40 | continue; | 959 | 40 | } | 960 | 10.0k | RETURN_IF_ERROR(parse_json_path(0, pi)); | 961 | 10.0k | } | 962 | 10.3k | } | 963 | | | 964 | 10.0k | res_data.reserve(ldata.size()); | 965 | 147k | for (size_t i = 0; i < input_rows_count; ++i) { | 966 | 137k | if (null_map[i]) { | 967 | 0 | continue; | 968 | 0 | } | 969 | | | 970 | 137k | const auto data_index = index_check_const(i, json_data_const); | 971 | 137k | if (l_null_map && (*l_null_map)[data_index]) { | 972 | 1.66k | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 973 | 1.66k | continue; | 974 | 1.66k | } | 975 | | | 976 | 136k | size_t l_off = loffsets[data_index - 1]; | 977 | 136k | size_t l_size = loffsets[data_index] - l_off; | 978 | 136k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 979 | 136k | if (rdata_columns.size() == 1) { // just return origin value | 980 | 135k | const auto path_index = index_check_const(i, path_const[0]); | 981 | 135k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 982 | 17 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 983 | 17 | continue; | 984 | 17 | } | 985 | | | 986 | 135k | if (!path_const[0]) { | 987 | 305 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 988 | 305 | } | 989 | | | 990 | 135k | writer->reset(); | 991 | 135k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 992 | 135k | l_size, json_path_list[0]); | 993 | 135k | } else { // will make array string to user | 994 | 651 | writer->reset(); | 995 | 651 | bool has_value = false; | 996 | | | 997 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 998 | 651 | const JsonbDocument* doc = nullptr; | 999 | 651 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1000 | | | 1001 | 1.77k | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1002 | 1.22k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1003 | 0 | continue; | 1004 | 0 | } | 1005 | | | 1006 | 1.22k | const auto path_index = index_check_const(i, path_const[pi]); | 1007 | 1.22k | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1008 | 98 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1009 | 98 | break; | 1010 | 98 | } | 1011 | | | 1012 | 1.12k | if (!path_const[pi]) { | 1013 | 64 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1014 | 64 | } | 1015 | | | 1016 | 1.12k | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1017 | | | 1018 | 1.12k | if (find_result.value) { | 1019 | 276 | if (!has_value) { | 1020 | 150 | has_value = true; | 1021 | 150 | writer->writeStartArray(); | 1022 | 150 | } | 1023 | 276 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1024 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1025 | | // if value is array, we should write all items in array, instead of write the array itself. | 1026 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1027 | 54 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1028 | 54 | writer->writeValue(&item); | 1029 | 54 | } | 1030 | 253 | } else { | 1031 | 253 | writer->writeValue(find_result.value); | 1032 | 253 | } | 1033 | 276 | } | 1034 | 1.12k | } | 1035 | 651 | if (has_value) { | 1036 | 150 | writer->writeEndArray(); | 1037 | 150 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1038 | 150 | writer->getOutput()->getSize()), | 1039 | 150 | i, res_data, res_offsets); | 1040 | 501 | } else { | 1041 | 501 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1042 | 501 | } | 1043 | 651 | } | 1044 | 136k | } //for | 1045 | 9.99k | return Status::OK(); | 1046 | 10.0k | } |
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 922 | 9 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 923 | 9 | const size_t input_rows_count = null_map.size(); | 924 | 9 | res_offsets.resize(input_rows_count); | 925 | | | 926 | 9 | auto writer = std::make_unique<JsonbWriter>(); | 927 | 9 | std::unique_ptr<JsonbToJson> formater; | 928 | | | 929 | | // reuseable json path list, espacially for const path | 930 | 9 | std::vector<JsonbPath> json_path_list; | 931 | 9 | json_path_list.resize(rdata_columns.size()); | 932 | | | 933 | | // lambda function to parse json path for row i and path pi | 934 | 9 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 935 | 9 | const auto index = index_check_const(i, path_const[pi]); | 936 | | | 937 | 9 | const ColumnString* path_col = rdata_columns[pi]; | 938 | 9 | const ColumnString::Chars& rdata = path_col->get_chars(); | 939 | 9 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 940 | 9 | size_t r_off = roffsets[index - 1]; | 941 | 9 | size_t r_size = roffsets[index] - r_off; | 942 | 9 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 943 | | | 944 | 9 | JsonbPath path; | 945 | 9 | if (!path.seek(r_raw, r_size)) { | 946 | 9 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 947 | 9 | std::string_view(r_raw, r_size)); | 948 | 9 | } | 949 | | | 950 | 9 | json_path_list[pi] = std::move(path); | 951 | | | 952 | 9 | return Status::OK(); | 953 | 9 | }; | 954 | | | 955 | 22 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 956 | 13 | if (path_const[pi]) { | 957 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 958 | 0 | continue; | 959 | 0 | } | 960 | 0 | RETURN_IF_ERROR(parse_json_path(0, pi)); | 961 | 0 | } | 962 | 13 | } | 963 | | | 964 | 9 | res_data.reserve(ldata.size()); | 965 | 18 | for (size_t i = 0; i < input_rows_count; ++i) { | 966 | 9 | if (null_map[i]) { | 967 | 0 | continue; | 968 | 0 | } | 969 | | | 970 | 9 | const auto data_index = index_check_const(i, json_data_const); | 971 | 9 | if (l_null_map && (*l_null_map)[data_index]) { | 972 | 1 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 973 | 1 | continue; | 974 | 1 | } | 975 | | | 976 | 8 | size_t l_off = loffsets[data_index - 1]; | 977 | 8 | size_t l_size = loffsets[data_index] - l_off; | 978 | 8 | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 979 | 8 | if (rdata_columns.size() == 1) { // just return origin value | 980 | 5 | const auto path_index = index_check_const(i, path_const[0]); | 981 | 5 | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 982 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 983 | 0 | continue; | 984 | 0 | } | 985 | | | 986 | 5 | if (!path_const[0]) { | 987 | 5 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 988 | 5 | } | 989 | | | 990 | 5 | writer->reset(); | 991 | 5 | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 992 | 5 | l_size, json_path_list[0]); | 993 | 5 | } else { // will make array string to user | 994 | 3 | writer->reset(); | 995 | 3 | bool has_value = false; | 996 | | | 997 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 998 | 3 | const JsonbDocument* doc = nullptr; | 999 | 3 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1000 | | | 1001 | 9 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1002 | 7 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1003 | 0 | continue; | 1004 | 0 | } | 1005 | | | 1006 | 7 | const auto path_index = index_check_const(i, path_const[pi]); | 1007 | 7 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1008 | 1 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1009 | 1 | break; | 1010 | 1 | } | 1011 | | | 1012 | 6 | if (!path_const[pi]) { | 1013 | 6 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1014 | 6 | } | 1015 | | | 1016 | 6 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1017 | | | 1018 | 6 | if (find_result.value) { | 1019 | 6 | if (!has_value) { | 1020 | 3 | has_value = true; | 1021 | 3 | writer->writeStartArray(); | 1022 | 3 | } | 1023 | 6 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1024 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1025 | | // if value is array, we should write all items in array, instead of write the array itself. | 1026 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1027 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1028 | 0 | writer->writeValue(&item); | 1029 | 0 | } | 1030 | 6 | } else { | 1031 | 6 | writer->writeValue(find_result.value); | 1032 | 6 | } | 1033 | 6 | } | 1034 | 6 | } | 1035 | 3 | if (has_value) { | 1036 | 3 | writer->writeEndArray(); | 1037 | 3 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1038 | 3 | writer->getOutput()->getSize()), | 1039 | 3 | i, res_data, res_offsets); | 1040 | 3 | } else { | 1041 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1042 | 0 | } | 1043 | 3 | } | 1044 | 8 | } //for | 1045 | 9 | return Status::OK(); | 1046 | 9 | } |
|
1047 | | |
1048 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1049 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1050 | | const ColumnString::Chars& rdata, |
1051 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1052 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1053 | | NullMap& null_map) { |
1054 | | size_t input_rows_count = loffsets.size(); |
1055 | | res_offsets.resize(input_rows_count); |
1056 | | |
1057 | | std::unique_ptr<JsonbToJson> formater; |
1058 | | |
1059 | | JsonbWriter writer; |
1060 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1061 | | if (l_null_map && (*l_null_map)[i]) { |
1062 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1063 | | continue; |
1064 | | } |
1065 | | |
1066 | | if (r_null_map && (*r_null_map)[i]) { |
1067 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1068 | | continue; |
1069 | | } |
1070 | | |
1071 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1072 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1073 | | |
1074 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1075 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1076 | | |
1077 | | JsonbPath path; |
1078 | | if (!path.seek(r_raw, r_size)) { |
1079 | | return Status::InvalidArgument( |
1080 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1081 | | std::string_view(r_raw, r_size), i); |
1082 | | } |
1083 | | |
1084 | | writer.reset(); |
1085 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1086 | | path); |
1087 | | } //for |
1088 | | return Status::OK(); |
1089 | | } //function |
1090 | | |
1091 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1092 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1093 | | const StringRef& rdata, ColumnString::Chars& res_data, |
1094 | | ColumnString::Offsets& res_offsets, NullMap& null_map) { |
1095 | | size_t input_rows_count = loffsets.size(); |
1096 | | res_offsets.resize(input_rows_count); |
1097 | | |
1098 | | std::unique_ptr<JsonbToJson> formater; |
1099 | | |
1100 | | JsonbPath path; |
1101 | | if (!path.seek(rdata.data, rdata.size)) { |
1102 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1103 | | std::string_view(rdata.data, rdata.size)); |
1104 | | } |
1105 | | |
1106 | | JsonbWriter writer; |
1107 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1108 | | if (l_null_map && (*l_null_map)[i]) { |
1109 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1110 | | continue; |
1111 | | } |
1112 | | |
1113 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1114 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1115 | | |
1116 | | writer.reset(); |
1117 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1118 | | path); |
1119 | | } //for |
1120 | | return Status::OK(); |
1121 | | } //function |
1122 | | |
1123 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1124 | | const ColumnString::Chars& rdata, |
1125 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1126 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1127 | | NullMap& null_map) { |
1128 | | size_t input_rows_count = roffsets.size(); |
1129 | | res_offsets.resize(input_rows_count); |
1130 | | |
1131 | | std::unique_ptr<JsonbToJson> formater; |
1132 | | |
1133 | | JsonbWriter writer; |
1134 | | |
1135 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1136 | | if (r_null_map && (*r_null_map)[i]) { |
1137 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1138 | | continue; |
1139 | | } |
1140 | | |
1141 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1142 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1143 | | |
1144 | | JsonbPath path; |
1145 | | if (!path.seek(r_raw, r_size)) { |
1146 | | return Status::InvalidArgument( |
1147 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1148 | | std::string_view(r_raw, r_size), i); |
1149 | | } |
1150 | | |
1151 | | writer.reset(); |
1152 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data, |
1153 | | ldata.size, path); |
1154 | | } //for |
1155 | | return Status::OK(); |
1156 | | } //function |
1157 | | }; |
1158 | | |
1159 | | struct JsonbExtractIsnull { |
1160 | | static constexpr auto name = "json_extract_isnull"; |
1161 | | static constexpr auto alias = "jsonb_extract_isnull"; |
1162 | | |
1163 | | using ReturnType = DataTypeUInt8; |
1164 | | using ColumnType = ColumnUInt8; |
1165 | | using Container = typename ColumnType::Container; |
1166 | | |
1167 | | private: |
1168 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map, |
1169 | | const char* l_raw_str, size_t l_str_size, |
1170 | 2.97k | JsonbPath& path) { |
1171 | 2.97k | if (null_map[i]) { |
1172 | 0 | res[i] = 0; |
1173 | 0 | return; |
1174 | 0 | } |
1175 | | |
1176 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1177 | 2.97k | const JsonbDocument* doc = nullptr; |
1178 | 2.97k | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
1179 | 2.97k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1180 | 0 | null_map[i] = 1; |
1181 | 0 | res[i] = 0; |
1182 | 0 | return; |
1183 | 0 | } |
1184 | | |
1185 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
1186 | 2.97k | auto find_result = doc->getValue()->findValue(path); |
1187 | 2.97k | const auto* value = find_result.value; |
1188 | | |
1189 | 2.97k | if (UNLIKELY(!value)) { |
1190 | 2.55k | null_map[i] = 1; |
1191 | 2.55k | res[i] = 0; |
1192 | 2.55k | return; |
1193 | 2.55k | } |
1194 | | |
1195 | 420 | res[i] = value->isNull(); |
1196 | 420 | } |
1197 | | |
1198 | | public: |
1199 | | // for jsonb_extract_int/int64/double |
1200 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1201 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1202 | | const ColumnString::Chars& rdata, |
1203 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1204 | 4 | Container& res, NullMap& null_map) { |
1205 | 4 | size_t size = loffsets.size(); |
1206 | 4 | res.resize(size); |
1207 | | |
1208 | 19 | for (size_t i = 0; i < loffsets.size(); i++) { |
1209 | 15 | if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) { |
1210 | 8 | res[i] = 0; |
1211 | 8 | null_map[i] = 1; |
1212 | 8 | continue; |
1213 | 8 | } |
1214 | | |
1215 | 7 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1216 | 7 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1217 | | |
1218 | 7 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1219 | 7 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1220 | | |
1221 | 7 | JsonbPath path; |
1222 | 7 | if (!path.seek(r_raw_str, r_str_size)) { |
1223 | 0 | return Status::InvalidArgument( |
1224 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1225 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1226 | 0 | } |
1227 | | |
1228 | 7 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1229 | 7 | } //for |
1230 | 4 | return Status::OK(); |
1231 | 4 | } //function |
1232 | | |
1233 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1234 | | const ColumnString::Chars& rdata, |
1235 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1236 | 1 | Container& res, NullMap& null_map) { |
1237 | 1 | size_t size = roffsets.size(); |
1238 | 1 | res.resize(size); |
1239 | | |
1240 | 13 | for (size_t i = 0; i < size; i++) { |
1241 | 12 | if (r_null_map && (*r_null_map)[i]) { |
1242 | 4 | res[i] = 0; |
1243 | 4 | null_map[i] = 1; |
1244 | 4 | continue; |
1245 | 4 | } |
1246 | | |
1247 | 8 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1248 | 8 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1249 | | |
1250 | 8 | JsonbPath path; |
1251 | 8 | if (!path.seek(r_raw_str, r_str_size)) { |
1252 | 0 | return Status::InvalidArgument( |
1253 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1254 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1255 | 0 | } |
1256 | | |
1257 | 8 | inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path); |
1258 | 8 | } //for |
1259 | 1 | return Status::OK(); |
1260 | 1 | } //function |
1261 | | |
1262 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1263 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1264 | 1.32k | const StringRef& rdata, Container& res, NullMap& null_map) { |
1265 | 1.32k | size_t size = loffsets.size(); |
1266 | 1.32k | res.resize(size); |
1267 | | |
1268 | 1.32k | JsonbPath path; |
1269 | 1.32k | if (!path.seek(rdata.data, rdata.size)) { |
1270 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1271 | 0 | std::string_view(rdata.data, rdata.size)); |
1272 | 0 | } |
1273 | | |
1274 | 4.50k | for (size_t i = 0; i < loffsets.size(); i++) { |
1275 | 3.18k | if (l_null_map && (*l_null_map)[i]) { |
1276 | 228 | res[i] = 0; |
1277 | 228 | null_map[i] = 1; |
1278 | 228 | continue; |
1279 | 228 | } |
1280 | | |
1281 | 2.95k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1282 | 2.95k | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1283 | | |
1284 | 2.95k | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1285 | 2.95k | } //for |
1286 | 1.32k | return Status::OK(); |
1287 | 1.32k | } //function |
1288 | | }; |
1289 | | |
1290 | | struct JsonbTypeJson { |
1291 | | using T = std::string; |
1292 | | using ReturnType = DataTypeJsonb; |
1293 | | using ColumnType = ColumnString; |
1294 | | static const bool only_get_type = false; |
1295 | | static const bool no_quotes = false; |
1296 | | }; |
1297 | | |
1298 | | struct JsonbTypeJsonNoQuotes { |
1299 | | using T = std::string; |
1300 | | using ReturnType = DataTypeJsonb; |
1301 | | using ColumnType = ColumnString; |
1302 | | static const bool only_get_type = false; |
1303 | | static const bool no_quotes = true; |
1304 | | }; |
1305 | | |
1306 | | struct JsonbTypeType { |
1307 | | using T = std::string; |
1308 | | using ReturnType = DataTypeString; |
1309 | | using ColumnType = ColumnString; |
1310 | | static const bool only_get_type = true; |
1311 | | static const bool no_quotes = false; |
1312 | | }; |
1313 | | |
1314 | | struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> { |
1315 | | static constexpr auto name = "jsonb_extract"; |
1316 | | static constexpr auto alias = "json_extract"; |
1317 | | }; |
1318 | | |
1319 | | struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> { |
1320 | | static constexpr auto name = "jsonb_extract_no_quotes"; |
1321 | | static constexpr auto alias = "json_extract_no_quotes"; |
1322 | | }; |
1323 | | |
1324 | | struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> { |
1325 | | static constexpr auto name = "json_type"; |
1326 | | static constexpr auto alias = "jsonb_type"; |
1327 | | }; |
1328 | | |
1329 | | using FunctionJsonbExists = FunctionJsonbExtractPath; |
1330 | | using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>; |
1331 | | |
1332 | | using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>; |
1333 | | using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>; |
1334 | | using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>; |
1335 | | |
1336 | | template <typename Impl> |
1337 | | class FunctionJsonbLength : public IFunction { |
1338 | | public: |
1339 | | static constexpr auto name = "json_length"; |
1340 | 1 | String get_name() const override { return name; } |
1341 | 794 | static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); } |
1342 | | |
1343 | 785 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1344 | 785 | return make_nullable(std::make_shared<DataTypeInt32>()); |
1345 | 785 | } |
1346 | 793 | DataTypes get_variadic_argument_types_impl() const override { |
1347 | 793 | return Impl::get_variadic_argument_types(); |
1348 | 793 | } |
1349 | 785 | size_t get_number_of_arguments() const override { |
1350 | 785 | return get_variadic_argument_types_impl().size(); |
1351 | 785 | } |
1352 | | |
1353 | 1.57k | bool use_default_implementation_for_nulls() const override { return false; } |
1354 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1355 | 791 | uint32_t result, size_t input_rows_count) const override { |
1356 | 791 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1357 | 791 | } |
1358 | | }; |
1359 | | |
1360 | | struct JsonbLengthUtil { |
1361 | | static Status jsonb_length_execute(FunctionContext* context, Block& block, |
1362 | | const ColumnNumbers& arguments, uint32_t result, |
1363 | 791 | size_t input_rows_count) { |
1364 | 791 | DORIS_CHECK_GE(arguments.size(), 2); |
1365 | 791 | ColumnPtr jsonb_data_column; |
1366 | 791 | bool jsonb_data_const = false; |
1367 | | // prepare jsonb data column |
1368 | 791 | std::tie(jsonb_data_column, jsonb_data_const) = |
1369 | 791 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1370 | 791 | ColumnPtr path_column; |
1371 | 791 | bool is_const = false; |
1372 | 791 | std::tie(path_column, is_const) = |
1373 | 791 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1374 | | |
1375 | 791 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1376 | 791 | auto return_type = block.get_data_type(result); |
1377 | 791 | MutableColumnPtr res = return_type->create_column(); |
1378 | | |
1379 | 791 | JsonbPath path; |
1380 | 791 | if (is_const) { |
1381 | 763 | if (path_column->is_null_at(0)) { |
1382 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1383 | 1 | null_map->get_data()[i] = 1; |
1384 | 1 | res->insert_data(nullptr, 0); |
1385 | 1 | } |
1386 | | |
1387 | 1 | block.replace_by_position( |
1388 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1389 | 1 | return Status::OK(); |
1390 | 1 | } |
1391 | | |
1392 | 762 | auto path_value = path_column->get_data_at(0); |
1393 | 762 | if (!path.seek(path_value.data, path_value.size)) { |
1394 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1395 | 0 | std::string_view(path_value.data, path_value.size)); |
1396 | 0 | } |
1397 | 762 | } |
1398 | | |
1399 | 1.66k | for (size_t i = 0; i < input_rows_count; ++i) { |
1400 | 877 | if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) || |
1401 | 877 | (jsonb_data_column->get_data_at(i).size == 0)) { |
1402 | 18 | null_map->get_data()[i] = 1; |
1403 | 18 | res->insert_data(nullptr, 0); |
1404 | 18 | continue; |
1405 | 18 | } |
1406 | 859 | if (!is_const) { |
1407 | 25 | auto path_value = path_column->get_data_at(i); |
1408 | 25 | path.clean(); |
1409 | 25 | if (!path.seek(path_value.data, path_value.size)) { |
1410 | 0 | return Status::InvalidArgument( |
1411 | 0 | "Json path error: Invalid Json Path for value: {}", |
1412 | 0 | std::string_view(path_value.data, path_value.size)); |
1413 | 0 | } |
1414 | 25 | } |
1415 | 859 | auto jsonb_value = jsonb_data_column->get_data_at(i); |
1416 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1417 | 859 | const JsonbDocument* doc = nullptr; |
1418 | 859 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data, |
1419 | 859 | jsonb_value.size, &doc)); |
1420 | 859 | auto find_result = doc->getValue()->findValue(path); |
1421 | 859 | const auto* value = find_result.value; |
1422 | 859 | if (UNLIKELY(!value)) { |
1423 | 74 | null_map->get_data()[i] = 1; |
1424 | 74 | res->insert_data(nullptr, 0); |
1425 | 74 | continue; |
1426 | 74 | } |
1427 | 785 | auto length = value->numElements(); |
1428 | 785 | res->insert_data(const_cast<const char*>((char*)&length), 0); |
1429 | 785 | } |
1430 | 790 | block.replace_by_position(result, |
1431 | 790 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1432 | 790 | return Status::OK(); |
1433 | 790 | } |
1434 | | }; |
1435 | | |
1436 | | struct JsonbLengthAndPathImpl { |
1437 | 793 | static DataTypes get_variadic_argument_types() { |
1438 | 793 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; |
1439 | 793 | } |
1440 | | |
1441 | | static Status execute_impl(FunctionContext* context, Block& block, |
1442 | | const ColumnNumbers& arguments, uint32_t result, |
1443 | 791 | size_t input_rows_count) { |
1444 | 791 | return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result, |
1445 | 791 | input_rows_count); |
1446 | 791 | } |
1447 | | }; |
1448 | | |
1449 | | template <typename Impl> |
1450 | | class FunctionJsonbContains : public IFunction { |
1451 | | public: |
1452 | | static constexpr auto name = "json_contains"; |
1453 | 1 | String get_name() const override { return name; } |
1454 | 65 | static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); } |
1455 | | |
1456 | 56 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1457 | 56 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
1458 | 56 | } |
1459 | 64 | DataTypes get_variadic_argument_types_impl() const override { |
1460 | 64 | return Impl::get_variadic_argument_types(); |
1461 | 64 | } |
1462 | 56 | size_t get_number_of_arguments() const override { |
1463 | 56 | return get_variadic_argument_types_impl().size(); |
1464 | 56 | } |
1465 | | |
1466 | 187 | bool use_default_implementation_for_nulls() const override { return false; } |
1467 | | |
1468 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1469 | 131 | uint32_t result, size_t input_rows_count) const override { |
1470 | 131 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1471 | 131 | } |
1472 | | }; |
1473 | | |
1474 | | struct JsonbContainsUtil { |
1475 | | static Status jsonb_contains_execute(FunctionContext* context, Block& block, |
1476 | | const ColumnNumbers& arguments, uint32_t result, |
1477 | 131 | size_t input_rows_count) { |
1478 | 131 | DORIS_CHECK_GE(arguments.size(), 3); |
1479 | | |
1480 | 131 | auto jsonb_data1_column = block.get_by_position(arguments[0]).column; |
1481 | 131 | auto jsonb_data2_column = block.get_by_position(arguments[1]).column; |
1482 | | |
1483 | 131 | ColumnPtr path_column; |
1484 | 131 | bool is_const = false; |
1485 | 131 | std::tie(path_column, is_const) = |
1486 | 131 | unpack_if_const(block.get_by_position(arguments[2]).column); |
1487 | | |
1488 | 131 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1489 | 131 | auto return_type = block.get_data_type(result); |
1490 | 131 | MutableColumnPtr res = return_type->create_column(); |
1491 | | |
1492 | 131 | JsonbPath path; |
1493 | 131 | if (is_const) { |
1494 | 84 | if (path_column->is_null_at(0)) { |
1495 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1496 | 1 | null_map->get_data()[i] = 1; |
1497 | 1 | res->insert_data(nullptr, 0); |
1498 | 1 | } |
1499 | | |
1500 | 1 | block.replace_by_position( |
1501 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1502 | 1 | return Status::OK(); |
1503 | 1 | } |
1504 | | |
1505 | 83 | auto path_value = path_column->get_data_at(0); |
1506 | 83 | if (!path.seek(path_value.data, path_value.size)) { |
1507 | 1 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1508 | 1 | std::string_view(path_value.data, path_value.size)); |
1509 | 1 | } |
1510 | 83 | } |
1511 | | |
1512 | 384 | for (size_t i = 0; i < input_rows_count; ++i) { |
1513 | 256 | if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) || |
1514 | 256 | path_column->is_null_at(i)) { |
1515 | 26 | null_map->get_data()[i] = 1; |
1516 | 26 | res->insert_data(nullptr, 0); |
1517 | 26 | continue; |
1518 | 26 | } |
1519 | | |
1520 | 230 | if (!is_const) { |
1521 | 50 | auto path_value = path_column->get_data_at(i); |
1522 | 50 | path.clean(); |
1523 | 50 | if (!path.seek(path_value.data, path_value.size)) { |
1524 | 1 | return Status::InvalidArgument( |
1525 | 1 | "Json path error: Invalid Json Path for value: {}", |
1526 | 1 | std::string_view(path_value.data, path_value.size)); |
1527 | 1 | } |
1528 | 50 | } |
1529 | | |
1530 | 229 | auto jsonb_value1 = jsonb_data1_column->get_data_at(i); |
1531 | 229 | auto jsonb_value2 = jsonb_data2_column->get_data_at(i); |
1532 | | |
1533 | 229 | if (jsonb_value1.size == 0 || jsonb_value2.size == 0) { |
1534 | 1 | null_map->get_data()[i] = 1; |
1535 | 1 | res->insert_data(nullptr, 0); |
1536 | 1 | continue; |
1537 | 1 | } |
1538 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1539 | 228 | const JsonbDocument* doc1 = nullptr; |
1540 | 228 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data, |
1541 | 228 | jsonb_value1.size, &doc1)); |
1542 | 228 | const JsonbDocument* doc2 = nullptr; |
1543 | 228 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data, |
1544 | 228 | jsonb_value2.size, &doc2)); |
1545 | | |
1546 | 228 | auto find_result = doc1->getValue()->findValue(path); |
1547 | 228 | const auto* value1 = find_result.value; |
1548 | 228 | const JsonbValue* value2 = doc2->getValue(); |
1549 | 228 | if (!value1 || !value2) { |
1550 | 45 | null_map->get_data()[i] = 1; |
1551 | 45 | res->insert_data(nullptr, 0); |
1552 | 45 | continue; |
1553 | 45 | } |
1554 | 183 | auto contains_value = value1->contains(value2); |
1555 | 183 | res->insert_data(const_cast<const char*>((char*)&contains_value), 0); |
1556 | 183 | } |
1557 | | |
1558 | 128 | block.replace_by_position(result, |
1559 | 128 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1560 | 128 | return Status::OK(); |
1561 | 129 | } |
1562 | | }; |
1563 | | |
1564 | | template <bool ignore_null> |
1565 | | class FunctionJsonbArray : public IFunction { |
1566 | | public: |
1567 | | static constexpr auto name = "json_array"; |
1568 | | static constexpr auto alias = "jsonb_array"; |
1569 | | |
1570 | 51 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }_ZN5doris18FunctionJsonbArrayILb0EE6createEv Line | Count | Source | 1570 | 40 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
_ZN5doris18FunctionJsonbArrayILb1EE6createEv Line | Count | Source | 1570 | 11 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
|
1571 | | |
1572 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev |
1573 | | |
1574 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv |
1575 | 35 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv Line | Count | Source | 1575 | 32 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv Line | Count | Source | 1575 | 3 | bool is_variadic() const override { return true; } |
|
1576 | | |
1577 | 64 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1577 | 60 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1577 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1578 | | |
1579 | 33 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1580 | 33 | return std::make_shared<DataTypeJsonb>(); |
1581 | 33 | } _ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1579 | 31 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1580 | 31 | return std::make_shared<DataTypeJsonb>(); | 1581 | 31 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1579 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1580 | 2 | return std::make_shared<DataTypeJsonb>(); | 1581 | 2 | } |
|
1582 | | |
1583 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1584 | 33 | uint32_t result, size_t input_rows_count) const override { |
1585 | 33 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1586 | 33 | auto column = return_data_type->create_column(); |
1587 | 33 | column->reserve(input_rows_count); |
1588 | | |
1589 | 33 | JsonbWriter writer; |
1590 | 100 | for (size_t i = 0; i < input_rows_count; ++i) { |
1591 | 63 | writer.writeStartArray(); |
1592 | 182 | for (auto argument : arguments) { |
1593 | 182 | auto&& [arg_column, is_const] = |
1594 | 182 | unpack_if_const(block.get_by_position(argument).column); |
1595 | 182 | if (const auto* nullable_column = |
1596 | 182 | check_and_get_column<ColumnNullable>(arg_column.get())) { |
1597 | 83 | const auto& null_map = nullable_column->get_null_map_data(); |
1598 | 83 | const auto& nested_column = nullable_column->get_nested_column(); |
1599 | 83 | const auto& jsonb_column = |
1600 | 83 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1601 | 83 | nested_column); |
1602 | | |
1603 | 83 | auto index = index_check_const(i, is_const); |
1604 | 83 | if (null_map[index]) { |
1605 | 30 | if constexpr (ignore_null) { |
1606 | 4 | continue; |
1607 | 26 | } else { |
1608 | 26 | writer.writeNull(); |
1609 | 26 | } |
1610 | 53 | } else { |
1611 | 53 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1612 | 53 | const JsonbDocument* doc = nullptr; |
1613 | 53 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1614 | 53 | jsonb_binary.size, &doc); |
1615 | 53 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1616 | 0 | if constexpr (ignore_null) { |
1617 | 0 | continue; |
1618 | 0 | } else { |
1619 | 0 | writer.writeNull(); |
1620 | 0 | } |
1621 | 53 | } else { |
1622 | 53 | writer.writeValue(doc->getValue()); |
1623 | 53 | } |
1624 | 53 | } |
1625 | 99 | } else { |
1626 | 99 | const auto& jsonb_column = |
1627 | 99 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1628 | 99 | *arg_column); |
1629 | | |
1630 | 99 | auto index = index_check_const(i, is_const); |
1631 | 99 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1632 | 99 | const JsonbDocument* doc = nullptr; |
1633 | 99 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1634 | 99 | jsonb_binary.size, &doc); |
1635 | 99 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1636 | 0 | if constexpr (ignore_null) { |
1637 | 0 | continue; |
1638 | 0 | } else { |
1639 | 0 | writer.writeNull(); |
1640 | 0 | } |
1641 | 99 | } else { |
1642 | 99 | writer.writeValue(doc->getValue()); |
1643 | 99 | } |
1644 | 99 | } |
1645 | 182 | } |
1646 | 19 | writer.writeEndArray(); |
1647 | 19 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1648 | 19 | writer.reset(); |
1649 | 19 | } |
1650 | | |
1651 | 6 | block.get_by_position(result).column = std::move(column); |
1652 | 6 | return Status::OK(); |
1653 | 33 | } _ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1584 | 31 | uint32_t result, size_t input_rows_count) const override { | 1585 | 31 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1586 | 31 | auto column = return_data_type->create_column(); | 1587 | 31 | column->reserve(input_rows_count); | 1588 | | | 1589 | 31 | JsonbWriter writer; | 1590 | 79 | for (size_t i = 0; i < input_rows_count; ++i) { | 1591 | 48 | writer.writeStartArray(); | 1592 | 152 | for (auto argument : arguments) { | 1593 | 152 | auto&& [arg_column, is_const] = | 1594 | 152 | unpack_if_const(block.get_by_position(argument).column); | 1595 | 152 | if (const auto* nullable_column = | 1596 | 152 | check_and_get_column<ColumnNullable>(arg_column.get())) { | 1597 | 58 | const auto& null_map = nullable_column->get_null_map_data(); | 1598 | 58 | const auto& nested_column = nullable_column->get_nested_column(); | 1599 | 58 | const auto& jsonb_column = | 1600 | 58 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1601 | 58 | nested_column); | 1602 | | | 1603 | 58 | auto index = index_check_const(i, is_const); | 1604 | 58 | if (null_map[index]) { | 1605 | | if constexpr (ignore_null) { | 1606 | | continue; | 1607 | 26 | } else { | 1608 | 26 | writer.writeNull(); | 1609 | 26 | } | 1610 | 32 | } else { | 1611 | 32 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1612 | 32 | const JsonbDocument* doc = nullptr; | 1613 | 32 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1614 | 32 | jsonb_binary.size, &doc); | 1615 | 32 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1616 | | if constexpr (ignore_null) { | 1617 | | continue; | 1618 | 0 | } else { | 1619 | 0 | writer.writeNull(); | 1620 | 0 | } | 1621 | 32 | } else { | 1622 | 32 | writer.writeValue(doc->getValue()); | 1623 | 32 | } | 1624 | 32 | } | 1625 | 94 | } else { | 1626 | 94 | const auto& jsonb_column = | 1627 | 94 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1628 | 94 | *arg_column); | 1629 | | | 1630 | 94 | auto index = index_check_const(i, is_const); | 1631 | 94 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1632 | 94 | const JsonbDocument* doc = nullptr; | 1633 | 94 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1634 | 94 | jsonb_binary.size, &doc); | 1635 | 94 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1636 | | if constexpr (ignore_null) { | 1637 | | continue; | 1638 | 0 | } else { | 1639 | 0 | writer.writeNull(); | 1640 | 0 | } | 1641 | 94 | } else { | 1642 | 94 | writer.writeValue(doc->getValue()); | 1643 | 94 | } | 1644 | 94 | } | 1645 | 152 | } | 1646 | 48 | writer.writeEndArray(); | 1647 | 48 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1648 | 48 | writer.reset(); | 1649 | 48 | } | 1650 | | | 1651 | 31 | block.get_by_position(result).column = std::move(column); | 1652 | 31 | return Status::OK(); | 1653 | 31 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1584 | 2 | uint32_t result, size_t input_rows_count) const override { | 1585 | 2 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1586 | 2 | auto column = return_data_type->create_column(); | 1587 | 2 | column->reserve(input_rows_count); | 1588 | | | 1589 | 2 | JsonbWriter writer; | 1590 | 21 | for (size_t i = 0; i < input_rows_count; ++i) { | 1591 | 15 | writer.writeStartArray(); | 1592 | 30 | for (auto argument : arguments) { | 1593 | 30 | auto&& [arg_column, is_const] = | 1594 | 30 | unpack_if_const(block.get_by_position(argument).column); | 1595 | 30 | if (const auto* nullable_column = | 1596 | 30 | check_and_get_column<ColumnNullable>(arg_column.get())) { | 1597 | 25 | const auto& null_map = nullable_column->get_null_map_data(); | 1598 | 25 | const auto& nested_column = nullable_column->get_nested_column(); | 1599 | 25 | const auto& jsonb_column = | 1600 | 25 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1601 | 25 | nested_column); | 1602 | | | 1603 | 25 | auto index = index_check_const(i, is_const); | 1604 | 25 | if (null_map[index]) { | 1605 | 4 | if constexpr (ignore_null) { | 1606 | 4 | continue; | 1607 | | } else { | 1608 | | writer.writeNull(); | 1609 | | } | 1610 | 21 | } else { | 1611 | 21 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1612 | 21 | const JsonbDocument* doc = nullptr; | 1613 | 21 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1614 | 21 | jsonb_binary.size, &doc); | 1615 | 21 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1616 | 0 | if constexpr (ignore_null) { | 1617 | 0 | continue; | 1618 | | } else { | 1619 | | writer.writeNull(); | 1620 | | } | 1621 | 21 | } else { | 1622 | 21 | writer.writeValue(doc->getValue()); | 1623 | 21 | } | 1624 | 21 | } | 1625 | 25 | } else { | 1626 | 5 | const auto& jsonb_column = | 1627 | 5 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1628 | 5 | *arg_column); | 1629 | | | 1630 | 5 | auto index = index_check_const(i, is_const); | 1631 | 5 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1632 | 5 | const JsonbDocument* doc = nullptr; | 1633 | 5 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1634 | 5 | jsonb_binary.size, &doc); | 1635 | 5 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1636 | 0 | if constexpr (ignore_null) { | 1637 | 0 | continue; | 1638 | | } else { | 1639 | | writer.writeNull(); | 1640 | | } | 1641 | 5 | } else { | 1642 | 5 | writer.writeValue(doc->getValue()); | 1643 | 5 | } | 1644 | 5 | } | 1645 | 30 | } | 1646 | 19 | writer.writeEndArray(); | 1647 | 19 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1648 | 19 | writer.reset(); | 1649 | 19 | } | 1650 | | | 1651 | 6 | block.get_by_position(result).column = std::move(column); | 1652 | 6 | return Status::OK(); | 1653 | 2 | } |
|
1654 | | }; |
1655 | | |
1656 | | class FunctionJsonbObject : public IFunction { |
1657 | | public: |
1658 | | static constexpr auto name = "json_object"; |
1659 | | static constexpr auto alias = "jsonb_object"; |
1660 | | |
1661 | 42 | static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); } |
1662 | | |
1663 | 0 | String get_name() const override { return name; } |
1664 | | |
1665 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1666 | 34 | bool is_variadic() const override { return true; } |
1667 | | |
1668 | 72 | bool use_default_implementation_for_nulls() const override { return false; } |
1669 | | |
1670 | 33 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1671 | 33 | return std::make_shared<DataTypeJsonb>(); |
1672 | 33 | } |
1673 | | |
1674 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1675 | 43 | uint32_t result, size_t input_rows_count) const override { |
1676 | 43 | if (arguments.size() % 2 != 0) { |
1677 | 0 | return Status::InvalidArgument( |
1678 | 0 | "JSON object must have an even number of arguments, but got: {}", |
1679 | 0 | arguments.size()); |
1680 | 0 | } |
1681 | | |
1682 | 43 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1683 | | |
1684 | 43 | auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const, |
1685 | 194 | const NullMap* null_map, const size_t arg_index, const size_t row_idx) { |
1686 | 194 | auto index = index_check_const(row_idx, is_const); |
1687 | 194 | if (null_map && (*null_map)[index]) { |
1688 | 1 | return Status::InvalidArgument( |
1689 | 1 | "JSON documents may not contain NULL member name(argument " |
1690 | 1 | "index: " |
1691 | 1 | "{}, row index: {})", |
1692 | 1 | row_idx, arg_index); |
1693 | 1 | } |
1694 | | |
1695 | 193 | auto key_string = key_col.get_data_at(index); |
1696 | 193 | if (key_string.size > 255) { |
1697 | 0 | return Status::InvalidArgument( |
1698 | 0 | "JSON object keys(argument index: {}) must be less than 256 " |
1699 | 0 | "bytes, but got size: {}", |
1700 | 0 | arg_index, key_string.size); |
1701 | 0 | } |
1702 | 193 | writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size)); |
1703 | 193 | return Status::OK(); |
1704 | 193 | }; |
1705 | | |
1706 | 43 | auto write_value = [](JsonbWriter& writer, const ColumnString& value_col, |
1707 | 43 | const bool is_const, const NullMap* null_map, const size_t arg_index, |
1708 | 193 | const size_t row_idx) { |
1709 | 193 | auto index = index_check_const(row_idx, is_const); |
1710 | 193 | if (null_map && (*null_map)[index]) { |
1711 | 46 | writer.writeNull(); |
1712 | 46 | return Status::OK(); |
1713 | 46 | } |
1714 | | |
1715 | 147 | auto value_string = value_col.get_data_at(index); |
1716 | 147 | const JsonbDocument* doc = nullptr; |
1717 | 147 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
1718 | 147 | value_string.size, &doc)); |
1719 | 147 | writer.writeValue(doc->getValue()); |
1720 | 147 | return Status::OK(); |
1721 | 147 | }; |
1722 | | |
1723 | 137 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1724 | 94 | auto key_argument = arguments[arg_idx]; |
1725 | 94 | auto value_argument = arguments[arg_idx + 1]; |
1726 | | |
1727 | 94 | auto& key_data_type = block.get_by_position(key_argument).type; |
1728 | 94 | auto& value_data_type = block.get_by_position(value_argument).type; |
1729 | 94 | if (!is_string_type(key_data_type->get_primitive_type())) { |
1730 | 0 | return Status::InvalidArgument( |
1731 | 0 | "JSON object key(argument index: {}) must be String, but got type: " |
1732 | 0 | "{}(primitive type: {})", |
1733 | 0 | arg_idx, key_data_type->get_name(), |
1734 | 0 | static_cast<int>(key_data_type->get_primitive_type())); |
1735 | 0 | } |
1736 | | |
1737 | 94 | if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) { |
1738 | 0 | return Status::InvalidArgument( |
1739 | 0 | "JSON object value(argument index: {}) must be JSON, but got type: {}", |
1740 | 0 | arg_idx, value_data_type->get_name()); |
1741 | 0 | } |
1742 | 94 | } |
1743 | | |
1744 | 43 | auto column = return_data_type->create_column(); |
1745 | 43 | column->reserve(input_rows_count); |
1746 | | |
1747 | 43 | JsonbWriter writer; |
1748 | 107 | for (size_t i = 0; i != input_rows_count; ++i) { |
1749 | 65 | writer.writeStartObject(); |
1750 | 258 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1751 | 194 | auto key_argument = arguments[arg_idx]; |
1752 | 194 | auto value_argument = arguments[arg_idx + 1]; |
1753 | 194 | auto&& [key_column, key_const] = |
1754 | 194 | unpack_if_const(block.get_by_position(key_argument).column); |
1755 | 194 | auto&& [value_column, value_const] = |
1756 | 194 | unpack_if_const(block.get_by_position(value_argument).column); |
1757 | | |
1758 | 194 | if (const auto* nullable_column = |
1759 | 194 | check_and_get_column<ColumnNullable>(key_column.get())) { |
1760 | 3 | const auto& null_map = nullable_column->get_null_map_data(); |
1761 | 3 | const auto& nested_column = nullable_column->get_nested_column(); |
1762 | 3 | const auto& key_arg_column = |
1763 | 3 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1764 | 3 | nested_column); |
1765 | | |
1766 | 3 | RETURN_IF_ERROR( |
1767 | 3 | write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i)); |
1768 | 191 | } else { |
1769 | 191 | const auto& key_arg_column = |
1770 | 191 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1771 | 191 | *key_column); |
1772 | 191 | RETURN_IF_ERROR( |
1773 | 191 | write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i)); |
1774 | 191 | } |
1775 | | |
1776 | 193 | if (const auto* nullable_column = |
1777 | 193 | check_and_get_column<ColumnNullable>(value_column.get())) { |
1778 | 93 | const auto& null_map = nullable_column->get_null_map_data(); |
1779 | 93 | const auto& nested_column = nullable_column->get_nested_column(); |
1780 | 93 | const auto& value_arg_column = |
1781 | 93 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1782 | 93 | nested_column); |
1783 | | |
1784 | 93 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map, |
1785 | 93 | arg_idx + 1, i)); |
1786 | 100 | } else { |
1787 | 100 | const auto& value_arg_column = |
1788 | 100 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1789 | 100 | *value_column); |
1790 | 100 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr, |
1791 | 100 | arg_idx + 1, i)); |
1792 | 100 | } |
1793 | 193 | } |
1794 | | |
1795 | 64 | writer.writeEndObject(); |
1796 | 64 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1797 | 64 | writer.reset(); |
1798 | 64 | } |
1799 | | |
1800 | 42 | block.get_by_position(result).column = std::move(column); |
1801 | 42 | return Status::OK(); |
1802 | 43 | } |
1803 | | }; |
1804 | | |
1805 | | enum class JsonbModifyType { Insert, Set, Replace }; |
1806 | | |
1807 | | template <JsonbModifyType modify_type> |
1808 | | struct JsonbModifyName { |
1809 | | static constexpr auto name = "jsonb_modify"; |
1810 | | static constexpr auto alias = "json_modify"; |
1811 | | }; |
1812 | | |
1813 | | template <> |
1814 | | struct JsonbModifyName<JsonbModifyType::Insert> { |
1815 | | static constexpr auto name = "jsonb_insert"; |
1816 | | static constexpr auto alias = "json_insert"; |
1817 | | }; |
1818 | | template <> |
1819 | | struct JsonbModifyName<JsonbModifyType::Set> { |
1820 | | static constexpr auto name = "jsonb_set"; |
1821 | | static constexpr auto alias = "json_set"; |
1822 | | }; |
1823 | | template <> |
1824 | | struct JsonbModifyName<JsonbModifyType::Replace> { |
1825 | | static constexpr auto name = "jsonb_replace"; |
1826 | | static constexpr auto alias = "json_replace"; |
1827 | | }; |
1828 | | |
1829 | | template <JsonbModifyType modify_type> |
1830 | | class FunctionJsonbModify : public IFunction { |
1831 | | public: |
1832 | | static constexpr auto name = JsonbModifyName<modify_type>::name; |
1833 | | static constexpr auto alias = JsonbModifyName<modify_type>::alias; |
1834 | | |
1835 | 112 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv Line | Count | Source | 1835 | 38 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv Line | Count | Source | 1835 | 37 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv Line | Count | Source | 1835 | 37 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
|
1836 | | |
1837 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev |
1838 | | |
1839 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv |
1840 | 88 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv Line | Count | Source | 1840 | 30 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv Line | Count | Source | 1840 | 29 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv Line | Count | Source | 1840 | 29 | bool is_variadic() const override { return true; } |
|
1841 | | |
1842 | 170 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1842 | 58 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1842 | 56 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 1842 | 56 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1843 | | |
1844 | 85 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1845 | 85 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
1846 | 85 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1844 | 29 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1845 | 29 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1846 | 29 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1844 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1845 | 28 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1846 | 28 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1844 | 28 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1845 | 28 | return make_nullable(std::make_shared<DataTypeJsonb>()); | 1846 | 28 | } |
|
1847 | | |
1848 | | Status create_all_null_result(const DataTypePtr& return_data_type, Block& block, |
1849 | 0 | uint32_t result, size_t input_rows_count) const { |
1850 | 0 | auto result_column = return_data_type->create_column(); |
1851 | 0 | result_column->insert_default(); |
1852 | 0 | auto const_column = ColumnConst::create(std::move(result_column), input_rows_count); |
1853 | 0 | block.get_by_position(result).column = std::move(const_column); |
1854 | 0 | return Status::OK(); |
1855 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm |
1856 | | |
1857 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1858 | 85 | uint32_t result, size_t input_rows_count) const override { |
1859 | 85 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { |
1860 | 0 | return Status::InvalidArgument( |
1861 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " |
1862 | 0 | "but got: {}", |
1863 | 0 | name, arguments.size()); |
1864 | 0 | } |
1865 | | |
1866 | 85 | const size_t keys_count = (arguments.size() - 1) / 2; |
1867 | | |
1868 | 85 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
1869 | | |
1870 | 85 | auto result_column = return_data_type->create_column(); |
1871 | 85 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); |
1872 | 85 | auto& null_map = result_nullable_col.get_null_map_data(); |
1873 | 85 | auto& res_string_column = |
1874 | 85 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); |
1875 | 85 | auto& res_chars = res_string_column.get_chars(); |
1876 | 85 | auto& res_offsets = res_string_column.get_offsets(); |
1877 | | |
1878 | 85 | null_map.resize_fill(input_rows_count, 0); |
1879 | 85 | res_offsets.resize(input_rows_count); |
1880 | 85 | auto&& [json_data_arg_column, json_data_const] = |
1881 | 85 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1882 | | |
1883 | 85 | if (json_data_const) { |
1884 | 11 | if (json_data_arg_column->is_null_at(0)) { |
1885 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1886 | 0 | } |
1887 | 11 | } |
1888 | | |
1889 | 85 | std::vector<const ColumnString*> json_path_columns(keys_count); |
1890 | 85 | std::vector<bool> json_path_constant(keys_count); |
1891 | 85 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); |
1892 | | |
1893 | 85 | std::vector<const ColumnString*> json_value_columns(keys_count); |
1894 | 85 | std::vector<bool> json_value_constant(keys_count); |
1895 | 85 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); |
1896 | | |
1897 | 85 | const NullMap* json_data_null_map = nullptr; |
1898 | 85 | const ColumnString* json_data_column; |
1899 | 85 | if (const auto* nullable_column = |
1900 | 85 | check_and_get_column<ColumnNullable>(json_data_arg_column.get())) { |
1901 | 85 | json_data_null_map = &nullable_column->get_null_map_data(); |
1902 | 85 | const auto& nested_column = nullable_column->get_nested_column(); |
1903 | 85 | json_data_column = assert_cast<const ColumnString*>(&nested_column); |
1904 | 85 | } else { |
1905 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); |
1906 | 0 | } |
1907 | | |
1908 | 195 | for (size_t i = 1; i < arguments.size(); i += 2) { |
1909 | 110 | auto&& [path_column, path_const] = |
1910 | 110 | unpack_if_const(block.get_by_position(arguments[i]).column); |
1911 | 110 | auto&& [value_column, value_const] = |
1912 | 110 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
1913 | | |
1914 | 110 | if (path_const) { |
1915 | 27 | if (path_column->is_null_at(0)) { |
1916 | 0 | return create_all_null_result(return_data_type, block, result, |
1917 | 0 | input_rows_count); |
1918 | 0 | } |
1919 | 27 | } |
1920 | | |
1921 | 110 | json_path_constant[i / 2] = path_const; |
1922 | 110 | if (const auto* nullable_column = |
1923 | 110 | check_and_get_column<ColumnNullable>(path_column.get())) { |
1924 | 8 | json_path_null_maps[i / 2] = &nullable_column->get_null_map_data(); |
1925 | 8 | const auto& nested_column = nullable_column->get_nested_column(); |
1926 | 8 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1927 | 102 | } else { |
1928 | 102 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); |
1929 | 102 | } |
1930 | | |
1931 | 110 | json_value_constant[i / 2] = value_const; |
1932 | 110 | if (const auto* nullable_column = |
1933 | 110 | check_and_get_column<ColumnNullable>(value_column.get())) { |
1934 | 53 | json_value_null_maps[i / 2] = &nullable_column->get_null_map_data(); |
1935 | 53 | const auto& nested_column = nullable_column->get_nested_column(); |
1936 | 53 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1937 | 57 | } else { |
1938 | 57 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); |
1939 | 57 | } |
1940 | 110 | } |
1941 | | |
1942 | 85 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); |
1943 | 85 | if (json_data_const) { |
1944 | 11 | auto json_data_string = json_data_column->get_data_at(0); |
1945 | 11 | const JsonbDocument* doc = nullptr; |
1946 | 11 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1947 | 11 | json_data_string.size, &doc)); |
1948 | 11 | if (!doc || !doc->getValue()) [[unlikely]] { |
1949 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1950 | 0 | } |
1951 | 62 | for (size_t i = 0; i != input_rows_count; ++i) { |
1952 | 51 | json_documents[i] = doc; |
1953 | 51 | } |
1954 | 74 | } else { |
1955 | 152 | for (size_t i = 0; i != input_rows_count; ++i) { |
1956 | 78 | if (json_data_null_map && (*json_data_null_map)[i]) { |
1957 | 0 | null_map[i] = 1; |
1958 | 0 | json_documents[i] = nullptr; |
1959 | 0 | continue; |
1960 | 0 | } |
1961 | | |
1962 | 78 | auto json_data_string = json_data_column->get_data_at(i); |
1963 | 78 | const JsonbDocument* doc = nullptr; |
1964 | 78 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1965 | 78 | json_data_string.size, &doc)); |
1966 | 78 | if (!doc || !doc->getValue()) [[unlikely]] { |
1967 | 0 | null_map[i] = 1; |
1968 | 0 | continue; |
1969 | 0 | } |
1970 | 78 | json_documents[i] = doc; |
1971 | 78 | } |
1972 | 74 | } |
1973 | | |
1974 | 85 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); |
1975 | 85 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); |
1976 | | |
1977 | 85 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, |
1978 | 85 | json_path_columns, json_path_constant, |
1979 | 85 | json_path_null_maps, json_value_columns, |
1980 | 85 | json_value_constant, json_value_null_maps)); |
1981 | | |
1982 | 77 | JsonbWriter writer; |
1983 | 77 | struct DocumentBuffer { |
1984 | 77 | DorisUniqueBufferPtr<char> ptr; |
1985 | 77 | size_t size = 0; |
1986 | 77 | size_t capacity = 0; |
1987 | 77 | }; |
1988 | | |
1989 | 77 | DocumentBuffer tmp_buffer; |
1990 | | |
1991 | 218 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { |
1992 | 341 | for (size_t i = 1; i < arguments.size(); i += 2) { |
1993 | 200 | const size_t index = i / 2; |
1994 | 200 | auto& json_path = json_paths[index]; |
1995 | 200 | auto& json_value = json_values[index]; |
1996 | | |
1997 | 200 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); |
1998 | 200 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); |
1999 | | |
2000 | 200 | if (null_map[row_idx]) { |
2001 | 0 | continue; |
2002 | 0 | } |
2003 | | |
2004 | 200 | if (json_documents[row_idx] == nullptr) { |
2005 | 0 | null_map[row_idx] = 1; |
2006 | 0 | continue; |
2007 | 0 | } |
2008 | | |
2009 | 200 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { |
2010 | 4 | null_map[row_idx] = 1; |
2011 | 4 | continue; |
2012 | 4 | } |
2013 | | |
2014 | 196 | auto find_result = |
2015 | 196 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); |
2016 | | |
2017 | 196 | if (find_result.is_wildcard) { |
2018 | 0 | return Status::InvalidArgument( |
2019 | 0 | " In this situation, path expressions may not contain the * and ** " |
2020 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2021 | 0 | i, row_idx); |
2022 | 0 | } |
2023 | | |
2024 | 196 | if constexpr (modify_type == JsonbModifyType::Insert) { |
2025 | 62 | if (find_result.value) { |
2026 | 18 | continue; |
2027 | 18 | } |
2028 | 67 | } else if constexpr (modify_type == JsonbModifyType::Replace) { |
2029 | 67 | if (!find_result.value) { |
2030 | 11 | continue; |
2031 | 11 | } |
2032 | 67 | } |
2033 | | |
2034 | 100 | std::vector<const JsonbValue*> parents; |
2035 | | |
2036 | 196 | bool replace = false; |
2037 | 196 | parents.emplace_back(json_documents[row_idx]->getValue()); |
2038 | 196 | const auto legs_count = json_path[path_index].get_leg_vector_size(); |
2039 | 196 | if (find_result.value) { |
2040 | | // find target path, replace it with the new value. |
2041 | 100 | replace = true; |
2042 | 100 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), |
2043 | 100 | json_path[path_index], parents)) { |
2044 | 0 | continue; |
2045 | 0 | } |
2046 | 100 | } else { |
2047 | | // does not find target path, insert the new value. |
2048 | 96 | JsonbPath new_path; |
2049 | 96 | DCHECK_GT(legs_count, 0); |
2050 | 156 | for (size_t j = 0; j + 1 < legs_count; ++j) { |
2051 | 60 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); |
2052 | 60 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( |
2053 | 60 | current_leg->leg_ptr, current_leg->leg_len, |
2054 | 60 | current_leg->array_index, current_leg->type); |
2055 | 60 | new_path.add_leg_to_leg_vector(std::move(leg)); |
2056 | 60 | } |
2057 | | |
2058 | 96 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, |
2059 | 96 | parents)) { |
2060 | 12 | continue; |
2061 | 12 | } |
2062 | 96 | } |
2063 | | |
2064 | 184 | leg_info* last_leg = |
2065 | 184 | legs_count > 0 |
2066 | 184 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) |
2067 | 184 | : nullptr; |
2068 | 184 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, |
2069 | 184 | json_value[value_index], replace, last_leg, |
2070 | 184 | writer)); |
2071 | | |
2072 | 184 | auto* writer_output = writer.getOutput(); |
2073 | 184 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2074 | 67 | tmp_buffer.capacity = |
2075 | 67 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2076 | 67 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); |
2077 | 67 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2078 | 67 | } |
2079 | | |
2080 | 184 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); |
2081 | 184 | tmp_buffer.size = writer_output->getSize(); |
2082 | | |
2083 | 184 | writer.reset(); |
2084 | | |
2085 | 184 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2086 | 184 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); |
2087 | 184 | } |
2088 | | |
2089 | 141 | if (!null_map[row_idx]) { |
2090 | 108 | const auto* jsonb_document = json_documents[row_idx]; |
2091 | 108 | const auto size = jsonb_document->numPackedBytes(); |
2092 | 108 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), |
2093 | 108 | reinterpret_cast<const char*>(jsonb_document) + size); |
2094 | 108 | } |
2095 | | |
2096 | 141 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2097 | | |
2098 | 141 | if (!null_map[row_idx]) { |
2099 | 108 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; |
2100 | 108 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; |
2101 | 108 | const JsonbDocument* doc = nullptr; |
2102 | 108 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2103 | 108 | reinterpret_cast<const char*>(ptr), size, &doc)); |
2104 | 108 | } |
2105 | 141 | } |
2106 | | |
2107 | 106 | block.get_by_position(result).column = std::move(result_column); |
2108 | 106 | return Status::OK(); |
2109 | 77 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1858 | 29 | uint32_t result, size_t input_rows_count) const override { | 1859 | 29 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1860 | 0 | return Status::InvalidArgument( | 1861 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1862 | 0 | "but got: {}", | 1863 | 0 | name, arguments.size()); | 1864 | 0 | } | 1865 | | | 1866 | 29 | const size_t keys_count = (arguments.size() - 1) / 2; | 1867 | | | 1868 | 29 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1869 | | | 1870 | 29 | auto result_column = return_data_type->create_column(); | 1871 | 29 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1872 | 29 | auto& null_map = result_nullable_col.get_null_map_data(); | 1873 | 29 | auto& res_string_column = | 1874 | 29 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1875 | 29 | auto& res_chars = res_string_column.get_chars(); | 1876 | 29 | auto& res_offsets = res_string_column.get_offsets(); | 1877 | | | 1878 | 29 | null_map.resize_fill(input_rows_count, 0); | 1879 | 29 | res_offsets.resize(input_rows_count); | 1880 | 29 | auto&& [json_data_arg_column, json_data_const] = | 1881 | 29 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1882 | | | 1883 | 29 | if (json_data_const) { | 1884 | 5 | if (json_data_arg_column->is_null_at(0)) { | 1885 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1886 | 0 | } | 1887 | 5 | } | 1888 | | | 1889 | 29 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1890 | 29 | std::vector<bool> json_path_constant(keys_count); | 1891 | 29 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1892 | | | 1893 | 29 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1894 | 29 | std::vector<bool> json_value_constant(keys_count); | 1895 | 29 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1896 | | | 1897 | 29 | const NullMap* json_data_null_map = nullptr; | 1898 | 29 | const ColumnString* json_data_column; | 1899 | 29 | if (const auto* nullable_column = | 1900 | 29 | check_and_get_column<ColumnNullable>(json_data_arg_column.get())) { | 1901 | 29 | json_data_null_map = &nullable_column->get_null_map_data(); | 1902 | 29 | const auto& nested_column = nullable_column->get_nested_column(); | 1903 | 29 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1904 | 29 | } else { | 1905 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1906 | 0 | } | 1907 | | | 1908 | 65 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1909 | 36 | auto&& [path_column, path_const] = | 1910 | 36 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1911 | 36 | auto&& [value_column, value_const] = | 1912 | 36 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1913 | | | 1914 | 36 | if (path_const) { | 1915 | 7 | if (path_column->is_null_at(0)) { | 1916 | 0 | return create_all_null_result(return_data_type, block, result, | 1917 | 0 | input_rows_count); | 1918 | 0 | } | 1919 | 7 | } | 1920 | | | 1921 | 36 | json_path_constant[i / 2] = path_const; | 1922 | 36 | if (const auto* nullable_column = | 1923 | 36 | check_and_get_column<ColumnNullable>(path_column.get())) { | 1924 | 5 | json_path_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1925 | 5 | const auto& nested_column = nullable_column->get_nested_column(); | 1926 | 5 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1927 | 31 | } else { | 1928 | 31 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1929 | 31 | } | 1930 | | | 1931 | 36 | json_value_constant[i / 2] = value_const; | 1932 | 36 | if (const auto* nullable_column = | 1933 | 36 | check_and_get_column<ColumnNullable>(value_column.get())) { | 1934 | 17 | json_value_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1935 | 17 | const auto& nested_column = nullable_column->get_nested_column(); | 1936 | 17 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1937 | 19 | } else { | 1938 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1939 | 19 | } | 1940 | 36 | } | 1941 | | | 1942 | 29 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1943 | 29 | if (json_data_const) { | 1944 | 5 | auto json_data_string = json_data_column->get_data_at(0); | 1945 | 5 | const JsonbDocument* doc = nullptr; | 1946 | 5 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1947 | 5 | json_data_string.size, &doc)); | 1948 | 5 | if (!doc || !doc->getValue()) [[unlikely]] { | 1949 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1950 | 0 | } | 1951 | 30 | for (size_t i = 0; i != input_rows_count; ++i) { | 1952 | 25 | json_documents[i] = doc; | 1953 | 25 | } | 1954 | 24 | } else { | 1955 | 50 | for (size_t i = 0; i != input_rows_count; ++i) { | 1956 | 26 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1957 | 0 | null_map[i] = 1; | 1958 | 0 | json_documents[i] = nullptr; | 1959 | 0 | continue; | 1960 | 0 | } | 1961 | | | 1962 | 26 | auto json_data_string = json_data_column->get_data_at(i); | 1963 | 26 | const JsonbDocument* doc = nullptr; | 1964 | 26 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1965 | 26 | json_data_string.size, &doc)); | 1966 | 26 | if (!doc || !doc->getValue()) [[unlikely]] { | 1967 | 0 | null_map[i] = 1; | 1968 | 0 | continue; | 1969 | 0 | } | 1970 | 26 | json_documents[i] = doc; | 1971 | 26 | } | 1972 | 24 | } | 1973 | | | 1974 | 29 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1975 | 29 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1976 | | | 1977 | 29 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1978 | 29 | json_path_columns, json_path_constant, | 1979 | 29 | json_path_null_maps, json_value_columns, | 1980 | 29 | json_value_constant, json_value_null_maps)); | 1981 | | | 1982 | 25 | JsonbWriter writer; | 1983 | 25 | struct DocumentBuffer { | 1984 | 25 | DorisUniqueBufferPtr<char> ptr; | 1985 | 25 | size_t size = 0; | 1986 | 25 | size_t capacity = 0; | 1987 | 25 | }; | 1988 | | | 1989 | 25 | DocumentBuffer tmp_buffer; | 1990 | | | 1991 | 81 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 1992 | 120 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1993 | 64 | const size_t index = i / 2; | 1994 | 64 | auto& json_path = json_paths[index]; | 1995 | 64 | auto& json_value = json_values[index]; | 1996 | | | 1997 | 64 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 1998 | 64 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 1999 | | | 2000 | 64 | if (null_map[row_idx]) { | 2001 | 0 | continue; | 2002 | 0 | } | 2003 | | | 2004 | 64 | if (json_documents[row_idx] == nullptr) { | 2005 | 0 | null_map[row_idx] = 1; | 2006 | 0 | continue; | 2007 | 0 | } | 2008 | | | 2009 | 64 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2010 | 2 | null_map[row_idx] = 1; | 2011 | 2 | continue; | 2012 | 2 | } | 2013 | | | 2014 | 62 | auto find_result = | 2015 | 62 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2016 | | | 2017 | 62 | if (find_result.is_wildcard) { | 2018 | 0 | return Status::InvalidArgument( | 2019 | 0 | " In this situation, path expressions may not contain the * and ** " | 2020 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2021 | 0 | i, row_idx); | 2022 | 0 | } | 2023 | | | 2024 | 62 | if constexpr (modify_type == JsonbModifyType::Insert) { | 2025 | 62 | if (find_result.value) { | 2026 | 18 | continue; | 2027 | 18 | } | 2028 | | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2029 | | if (!find_result.value) { | 2030 | | continue; | 2031 | | } | 2032 | | } | 2033 | | | 2034 | 44 | std::vector<const JsonbValue*> parents; | 2035 | | | 2036 | 62 | bool replace = false; | 2037 | 62 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2038 | 62 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2039 | 62 | if (find_result.value) { | 2040 | | // find target path, replace it with the new value. | 2041 | 0 | replace = true; | 2042 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2043 | 0 | json_path[path_index], parents)) { | 2044 | 0 | continue; | 2045 | 0 | } | 2046 | 62 | } else { | 2047 | | // does not find target path, insert the new value. | 2048 | 62 | JsonbPath new_path; | 2049 | 62 | DCHECK_GT(legs_count, 0); | 2050 | 103 | for (size_t j = 0; j + 1 < legs_count; ++j) { | 2051 | 41 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2052 | 41 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2053 | 41 | current_leg->leg_ptr, current_leg->leg_len, | 2054 | 41 | current_leg->array_index, current_leg->type); | 2055 | 41 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2056 | 41 | } | 2057 | | | 2058 | 62 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2059 | 62 | parents)) { | 2060 | 1 | continue; | 2061 | 1 | } | 2062 | 62 | } | 2063 | | | 2064 | 61 | leg_info* last_leg = | 2065 | 61 | legs_count > 0 | 2066 | 61 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2067 | 61 | : nullptr; | 2068 | 61 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2069 | 61 | json_value[value_index], replace, last_leg, | 2070 | 61 | writer)); | 2071 | | | 2072 | 61 | auto* writer_output = writer.getOutput(); | 2073 | 61 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2074 | 20 | tmp_buffer.capacity = | 2075 | 20 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2076 | 20 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2077 | 20 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2078 | 20 | } | 2079 | | | 2080 | 61 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2081 | 61 | tmp_buffer.size = writer_output->getSize(); | 2082 | | | 2083 | 61 | writer.reset(); | 2084 | | | 2085 | 61 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2086 | 61 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2087 | 61 | } | 2088 | | | 2089 | 56 | if (!null_map[row_idx]) { | 2090 | 36 | const auto* jsonb_document = json_documents[row_idx]; | 2091 | 36 | const auto size = jsonb_document->numPackedBytes(); | 2092 | 36 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2093 | 36 | reinterpret_cast<const char*>(jsonb_document) + size); | 2094 | 36 | } | 2095 | | | 2096 | 56 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2097 | | | 2098 | 56 | if (!null_map[row_idx]) { | 2099 | 36 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2100 | 36 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2101 | 36 | const JsonbDocument* doc = nullptr; | 2102 | 36 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2103 | 36 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2104 | 36 | } | 2105 | 56 | } | 2106 | | | 2107 | 43 | block.get_by_position(result).column = std::move(result_column); | 2108 | 43 | return Status::OK(); | 2109 | 25 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1858 | 28 | uint32_t result, size_t input_rows_count) const override { | 1859 | 28 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1860 | 0 | return Status::InvalidArgument( | 1861 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1862 | 0 | "but got: {}", | 1863 | 0 | name, arguments.size()); | 1864 | 0 | } | 1865 | | | 1866 | 28 | const size_t keys_count = (arguments.size() - 1) / 2; | 1867 | | | 1868 | 28 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1869 | | | 1870 | 28 | auto result_column = return_data_type->create_column(); | 1871 | 28 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1872 | 28 | auto& null_map = result_nullable_col.get_null_map_data(); | 1873 | 28 | auto& res_string_column = | 1874 | 28 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1875 | 28 | auto& res_chars = res_string_column.get_chars(); | 1876 | 28 | auto& res_offsets = res_string_column.get_offsets(); | 1877 | | | 1878 | 28 | null_map.resize_fill(input_rows_count, 0); | 1879 | 28 | res_offsets.resize(input_rows_count); | 1880 | 28 | auto&& [json_data_arg_column, json_data_const] = | 1881 | 28 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1882 | | | 1883 | 28 | if (json_data_const) { | 1884 | 3 | if (json_data_arg_column->is_null_at(0)) { | 1885 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1886 | 0 | } | 1887 | 3 | } | 1888 | | | 1889 | 28 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1890 | 28 | std::vector<bool> json_path_constant(keys_count); | 1891 | 28 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1892 | | | 1893 | 28 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1894 | 28 | std::vector<bool> json_value_constant(keys_count); | 1895 | 28 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1896 | | | 1897 | 28 | const NullMap* json_data_null_map = nullptr; | 1898 | 28 | const ColumnString* json_data_column; | 1899 | 28 | if (const auto* nullable_column = | 1900 | 28 | check_and_get_column<ColumnNullable>(json_data_arg_column.get())) { | 1901 | 28 | json_data_null_map = &nullable_column->get_null_map_data(); | 1902 | 28 | const auto& nested_column = nullable_column->get_nested_column(); | 1903 | 28 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1904 | 28 | } else { | 1905 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1906 | 0 | } | 1907 | | | 1908 | 64 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1909 | 36 | auto&& [path_column, path_const] = | 1910 | 36 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1911 | 36 | auto&& [value_column, value_const] = | 1912 | 36 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1913 | | | 1914 | 36 | if (path_const) { | 1915 | 9 | if (path_column->is_null_at(0)) { | 1916 | 0 | return create_all_null_result(return_data_type, block, result, | 1917 | 0 | input_rows_count); | 1918 | 0 | } | 1919 | 9 | } | 1920 | | | 1921 | 36 | json_path_constant[i / 2] = path_const; | 1922 | 36 | if (const auto* nullable_column = | 1923 | 36 | check_and_get_column<ColumnNullable>(path_column.get())) { | 1924 | 2 | json_path_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1925 | 2 | const auto& nested_column = nullable_column->get_nested_column(); | 1926 | 2 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1927 | 34 | } else { | 1928 | 34 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1929 | 34 | } | 1930 | | | 1931 | 36 | json_value_constant[i / 2] = value_const; | 1932 | 36 | if (const auto* nullable_column = | 1933 | 36 | check_and_get_column<ColumnNullable>(value_column.get())) { | 1934 | 17 | json_value_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1935 | 17 | const auto& nested_column = nullable_column->get_nested_column(); | 1936 | 17 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1937 | 19 | } else { | 1938 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1939 | 19 | } | 1940 | 36 | } | 1941 | | | 1942 | 28 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1943 | 28 | if (json_data_const) { | 1944 | 3 | auto json_data_string = json_data_column->get_data_at(0); | 1945 | 3 | const JsonbDocument* doc = nullptr; | 1946 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1947 | 3 | json_data_string.size, &doc)); | 1948 | 3 | if (!doc || !doc->getValue()) [[unlikely]] { | 1949 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1950 | 0 | } | 1951 | 17 | for (size_t i = 0; i != input_rows_count; ++i) { | 1952 | 14 | json_documents[i] = doc; | 1953 | 14 | } | 1954 | 25 | } else { | 1955 | 52 | for (size_t i = 0; i != input_rows_count; ++i) { | 1956 | 27 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1957 | 0 | null_map[i] = 1; | 1958 | 0 | json_documents[i] = nullptr; | 1959 | 0 | continue; | 1960 | 0 | } | 1961 | | | 1962 | 27 | auto json_data_string = json_data_column->get_data_at(i); | 1963 | 27 | const JsonbDocument* doc = nullptr; | 1964 | 27 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1965 | 27 | json_data_string.size, &doc)); | 1966 | 27 | if (!doc || !doc->getValue()) [[unlikely]] { | 1967 | 0 | null_map[i] = 1; | 1968 | 0 | continue; | 1969 | 0 | } | 1970 | 27 | json_documents[i] = doc; | 1971 | 27 | } | 1972 | 25 | } | 1973 | | | 1974 | 28 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1975 | 28 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1976 | | | 1977 | 28 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1978 | 28 | json_path_columns, json_path_constant, | 1979 | 28 | json_path_null_maps, json_value_columns, | 1980 | 28 | json_value_constant, json_value_null_maps)); | 1981 | | | 1982 | 26 | JsonbWriter writer; | 1983 | 26 | struct DocumentBuffer { | 1984 | 26 | DorisUniqueBufferPtr<char> ptr; | 1985 | 26 | size_t size = 0; | 1986 | 26 | size_t capacity = 0; | 1987 | 26 | }; | 1988 | | | 1989 | 26 | DocumentBuffer tmp_buffer; | 1990 | | | 1991 | 65 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 1992 | 107 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1993 | 68 | const size_t index = i / 2; | 1994 | 68 | auto& json_path = json_paths[index]; | 1995 | 68 | auto& json_value = json_values[index]; | 1996 | | | 1997 | 68 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 1998 | 68 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 1999 | | | 2000 | 68 | if (null_map[row_idx]) { | 2001 | 0 | continue; | 2002 | 0 | } | 2003 | | | 2004 | 68 | if (json_documents[row_idx] == nullptr) { | 2005 | 0 | null_map[row_idx] = 1; | 2006 | 0 | continue; | 2007 | 0 | } | 2008 | | | 2009 | 68 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2010 | 1 | null_map[row_idx] = 1; | 2011 | 1 | continue; | 2012 | 1 | } | 2013 | | | 2014 | 67 | auto find_result = | 2015 | 67 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2016 | | | 2017 | 67 | if (find_result.is_wildcard) { | 2018 | 0 | return Status::InvalidArgument( | 2019 | 0 | " In this situation, path expressions may not contain the * and ** " | 2020 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2021 | 0 | i, row_idx); | 2022 | 0 | } | 2023 | | | 2024 | | if constexpr (modify_type == JsonbModifyType::Insert) { | 2025 | | if (find_result.value) { | 2026 | | continue; | 2027 | | } | 2028 | 67 | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2029 | 67 | if (!find_result.value) { | 2030 | 67 | continue; | 2031 | 67 | } | 2032 | 67 | } | 2033 | | | 2034 | 67 | std::vector<const JsonbValue*> parents; | 2035 | | | 2036 | 67 | bool replace = false; | 2037 | 67 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2038 | 67 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2039 | 67 | if (find_result.value) { | 2040 | | // find target path, replace it with the new value. | 2041 | 44 | replace = true; | 2042 | 44 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2043 | 44 | json_path[path_index], parents)) { | 2044 | 0 | continue; | 2045 | 0 | } | 2046 | 44 | } else { | 2047 | | // does not find target path, insert the new value. | 2048 | 23 | JsonbPath new_path; | 2049 | 23 | DCHECK_GT(legs_count, 0); | 2050 | 42 | for (size_t j = 0; j + 1 < legs_count; ++j) { | 2051 | 19 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2052 | 19 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2053 | 19 | current_leg->leg_ptr, current_leg->leg_len, | 2054 | 19 | current_leg->array_index, current_leg->type); | 2055 | 19 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2056 | 19 | } | 2057 | | | 2058 | 23 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2059 | 23 | parents)) { | 2060 | 11 | continue; | 2061 | 11 | } | 2062 | 23 | } | 2063 | | | 2064 | 56 | leg_info* last_leg = | 2065 | 56 | legs_count > 0 | 2066 | 56 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2067 | 56 | : nullptr; | 2068 | 56 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2069 | 56 | json_value[value_index], replace, last_leg, | 2070 | 56 | writer)); | 2071 | | | 2072 | 56 | auto* writer_output = writer.getOutput(); | 2073 | 56 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2074 | 24 | tmp_buffer.capacity = | 2075 | 24 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2076 | 24 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2077 | 24 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2078 | 24 | } | 2079 | | | 2080 | 56 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2081 | 56 | tmp_buffer.size = writer_output->getSize(); | 2082 | | | 2083 | 56 | writer.reset(); | 2084 | | | 2085 | 56 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2086 | 56 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2087 | 56 | } | 2088 | | | 2089 | 39 | if (!null_map[row_idx]) { | 2090 | 38 | const auto* jsonb_document = json_documents[row_idx]; | 2091 | 38 | const auto size = jsonb_document->numPackedBytes(); | 2092 | 38 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2093 | 38 | reinterpret_cast<const char*>(jsonb_document) + size); | 2094 | 38 | } | 2095 | | | 2096 | 39 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2097 | | | 2098 | 39 | if (!null_map[row_idx]) { | 2099 | 38 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2100 | 38 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2101 | 38 | const JsonbDocument* doc = nullptr; | 2102 | 38 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2103 | 38 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2104 | 38 | } | 2105 | 39 | } | 2106 | | | 2107 | 26 | block.get_by_position(result).column = std::move(result_column); | 2108 | 26 | return Status::OK(); | 2109 | 26 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1858 | 28 | uint32_t result, size_t input_rows_count) const override { | 1859 | 28 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { | 1860 | 0 | return Status::InvalidArgument( | 1861 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " | 1862 | 0 | "but got: {}", | 1863 | 0 | name, arguments.size()); | 1864 | 0 | } | 1865 | | | 1866 | 28 | const size_t keys_count = (arguments.size() - 1) / 2; | 1867 | | | 1868 | 28 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); | 1869 | | | 1870 | 28 | auto result_column = return_data_type->create_column(); | 1871 | 28 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); | 1872 | 28 | auto& null_map = result_nullable_col.get_null_map_data(); | 1873 | 28 | auto& res_string_column = | 1874 | 28 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); | 1875 | 28 | auto& res_chars = res_string_column.get_chars(); | 1876 | 28 | auto& res_offsets = res_string_column.get_offsets(); | 1877 | | | 1878 | 28 | null_map.resize_fill(input_rows_count, 0); | 1879 | 28 | res_offsets.resize(input_rows_count); | 1880 | 28 | auto&& [json_data_arg_column, json_data_const] = | 1881 | 28 | unpack_if_const(block.get_by_position(arguments[0]).column); | 1882 | | | 1883 | 28 | if (json_data_const) { | 1884 | 3 | if (json_data_arg_column->is_null_at(0)) { | 1885 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1886 | 0 | } | 1887 | 3 | } | 1888 | | | 1889 | 28 | std::vector<const ColumnString*> json_path_columns(keys_count); | 1890 | 28 | std::vector<bool> json_path_constant(keys_count); | 1891 | 28 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); | 1892 | | | 1893 | 28 | std::vector<const ColumnString*> json_value_columns(keys_count); | 1894 | 28 | std::vector<bool> json_value_constant(keys_count); | 1895 | 28 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); | 1896 | | | 1897 | 28 | const NullMap* json_data_null_map = nullptr; | 1898 | 28 | const ColumnString* json_data_column; | 1899 | 28 | if (const auto* nullable_column = | 1900 | 28 | check_and_get_column<ColumnNullable>(json_data_arg_column.get())) { | 1901 | 28 | json_data_null_map = &nullable_column->get_null_map_data(); | 1902 | 28 | const auto& nested_column = nullable_column->get_nested_column(); | 1903 | 28 | json_data_column = assert_cast<const ColumnString*>(&nested_column); | 1904 | 28 | } else { | 1905 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); | 1906 | 0 | } | 1907 | | | 1908 | 66 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1909 | 38 | auto&& [path_column, path_const] = | 1910 | 38 | unpack_if_const(block.get_by_position(arguments[i]).column); | 1911 | 38 | auto&& [value_column, value_const] = | 1912 | 38 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 1913 | | | 1914 | 38 | if (path_const) { | 1915 | 11 | if (path_column->is_null_at(0)) { | 1916 | 0 | return create_all_null_result(return_data_type, block, result, | 1917 | 0 | input_rows_count); | 1918 | 0 | } | 1919 | 11 | } | 1920 | | | 1921 | 38 | json_path_constant[i / 2] = path_const; | 1922 | 38 | if (const auto* nullable_column = | 1923 | 38 | check_and_get_column<ColumnNullable>(path_column.get())) { | 1924 | 1 | json_path_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1925 | 1 | const auto& nested_column = nullable_column->get_nested_column(); | 1926 | 1 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1927 | 37 | } else { | 1928 | 37 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); | 1929 | 37 | } | 1930 | | | 1931 | 38 | json_value_constant[i / 2] = value_const; | 1932 | 38 | if (const auto* nullable_column = | 1933 | 38 | check_and_get_column<ColumnNullable>(value_column.get())) { | 1934 | 19 | json_value_null_maps[i / 2] = &nullable_column->get_null_map_data(); | 1935 | 19 | const auto& nested_column = nullable_column->get_nested_column(); | 1936 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); | 1937 | 19 | } else { | 1938 | 19 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); | 1939 | 19 | } | 1940 | 38 | } | 1941 | | | 1942 | 28 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); | 1943 | 28 | if (json_data_const) { | 1944 | 3 | auto json_data_string = json_data_column->get_data_at(0); | 1945 | 3 | const JsonbDocument* doc = nullptr; | 1946 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1947 | 3 | json_data_string.size, &doc)); | 1948 | 3 | if (!doc || !doc->getValue()) [[unlikely]] { | 1949 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); | 1950 | 0 | } | 1951 | 15 | for (size_t i = 0; i != input_rows_count; ++i) { | 1952 | 12 | json_documents[i] = doc; | 1953 | 12 | } | 1954 | 25 | } else { | 1955 | 50 | for (size_t i = 0; i != input_rows_count; ++i) { | 1956 | 25 | if (json_data_null_map && (*json_data_null_map)[i]) { | 1957 | 0 | null_map[i] = 1; | 1958 | 0 | json_documents[i] = nullptr; | 1959 | 0 | continue; | 1960 | 0 | } | 1961 | | | 1962 | 25 | auto json_data_string = json_data_column->get_data_at(i); | 1963 | 25 | const JsonbDocument* doc = nullptr; | 1964 | 25 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, | 1965 | 25 | json_data_string.size, &doc)); | 1966 | 25 | if (!doc || !doc->getValue()) [[unlikely]] { | 1967 | 0 | null_map[i] = 1; | 1968 | 0 | continue; | 1969 | 0 | } | 1970 | 25 | json_documents[i] = doc; | 1971 | 25 | } | 1972 | 25 | } | 1973 | | | 1974 | 28 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); | 1975 | 28 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); | 1976 | | | 1977 | 28 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, | 1978 | 28 | json_path_columns, json_path_constant, | 1979 | 28 | json_path_null_maps, json_value_columns, | 1980 | 28 | json_value_constant, json_value_null_maps)); | 1981 | | | 1982 | 26 | JsonbWriter writer; | 1983 | 26 | struct DocumentBuffer { | 1984 | 26 | DorisUniqueBufferPtr<char> ptr; | 1985 | 26 | size_t size = 0; | 1986 | 26 | size_t capacity = 0; | 1987 | 26 | }; | 1988 | | | 1989 | 26 | DocumentBuffer tmp_buffer; | 1990 | | | 1991 | 72 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { | 1992 | 114 | for (size_t i = 1; i < arguments.size(); i += 2) { | 1993 | 68 | const size_t index = i / 2; | 1994 | 68 | auto& json_path = json_paths[index]; | 1995 | 68 | auto& json_value = json_values[index]; | 1996 | | | 1997 | 68 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); | 1998 | 68 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); | 1999 | | | 2000 | 68 | if (null_map[row_idx]) { | 2001 | 0 | continue; | 2002 | 0 | } | 2003 | | | 2004 | 68 | if (json_documents[row_idx] == nullptr) { | 2005 | 0 | null_map[row_idx] = 1; | 2006 | 0 | continue; | 2007 | 0 | } | 2008 | | | 2009 | 68 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { | 2010 | 1 | null_map[row_idx] = 1; | 2011 | 1 | continue; | 2012 | 1 | } | 2013 | | | 2014 | 67 | auto find_result = | 2015 | 67 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); | 2016 | | | 2017 | 67 | if (find_result.is_wildcard) { | 2018 | 0 | return Status::InvalidArgument( | 2019 | 0 | " In this situation, path expressions may not contain the * and ** " | 2020 | 0 | "tokens or an array range, argument index: {}, row index: {}", | 2021 | 0 | i, row_idx); | 2022 | 0 | } | 2023 | | | 2024 | | if constexpr (modify_type == JsonbModifyType::Insert) { | 2025 | | if (find_result.value) { | 2026 | | continue; | 2027 | | } | 2028 | 67 | } else if constexpr (modify_type == JsonbModifyType::Replace) { | 2029 | 67 | if (!find_result.value) { | 2030 | 11 | continue; | 2031 | 11 | } | 2032 | 67 | } | 2033 | | | 2034 | 56 | std::vector<const JsonbValue*> parents; | 2035 | | | 2036 | 67 | bool replace = false; | 2037 | 67 | parents.emplace_back(json_documents[row_idx]->getValue()); | 2038 | 67 | const auto legs_count = json_path[path_index].get_leg_vector_size(); | 2039 | 67 | if (find_result.value) { | 2040 | | // find target path, replace it with the new value. | 2041 | 56 | replace = true; | 2042 | 56 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), | 2043 | 56 | json_path[path_index], parents)) { | 2044 | 0 | continue; | 2045 | 0 | } | 2046 | 56 | } else { | 2047 | | // does not find target path, insert the new value. | 2048 | 11 | JsonbPath new_path; | 2049 | 11 | DCHECK_GT(legs_count, 0); | 2050 | 11 | for (size_t j = 0; j + 1 < legs_count; ++j) { | 2051 | 0 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); | 2052 | 0 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( | 2053 | 0 | current_leg->leg_ptr, current_leg->leg_len, | 2054 | 0 | current_leg->array_index, current_leg->type); | 2055 | 0 | new_path.add_leg_to_leg_vector(std::move(leg)); | 2056 | 0 | } | 2057 | | | 2058 | 11 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, | 2059 | 11 | parents)) { | 2060 | 0 | continue; | 2061 | 0 | } | 2062 | 11 | } | 2063 | | | 2064 | 67 | leg_info* last_leg = | 2065 | 67 | legs_count > 0 | 2066 | 67 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) | 2067 | 67 | : nullptr; | 2068 | 67 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, | 2069 | 67 | json_value[value_index], replace, last_leg, | 2070 | 67 | writer)); | 2071 | | | 2072 | 67 | auto* writer_output = writer.getOutput(); | 2073 | 67 | if (writer_output->getSize() > tmp_buffer.capacity) { | 2074 | 23 | tmp_buffer.capacity = | 2075 | 23 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; | 2076 | 23 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); | 2077 | 23 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); | 2078 | 23 | } | 2079 | | | 2080 | 67 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); | 2081 | 67 | tmp_buffer.size = writer_output->getSize(); | 2082 | | | 2083 | 67 | writer.reset(); | 2084 | | | 2085 | 67 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2086 | 67 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); | 2087 | 67 | } | 2088 | | | 2089 | 46 | if (!null_map[row_idx]) { | 2090 | 34 | const auto* jsonb_document = json_documents[row_idx]; | 2091 | 34 | const auto size = jsonb_document->numPackedBytes(); | 2092 | 34 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), | 2093 | 34 | reinterpret_cast<const char*>(jsonb_document) + size); | 2094 | 34 | } | 2095 | | | 2096 | 46 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); | 2097 | | | 2098 | 46 | if (!null_map[row_idx]) { | 2099 | 34 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; | 2100 | 34 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; | 2101 | 34 | const JsonbDocument* doc = nullptr; | 2102 | 34 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( | 2103 | 34 | reinterpret_cast<const char*>(ptr), size, &doc)); | 2104 | 34 | } | 2105 | 46 | } | 2106 | | | 2107 | 37 | block.get_by_position(result).column = std::move(result_column); | 2108 | 37 | return Status::OK(); | 2109 | 26 | } |
|
2110 | | |
2111 | | bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path, |
2112 | 352 | std::vector<const JsonbValue*>& parents) const { |
2113 | 352 | const size_t index = parents.size() - 1; |
2114 | 352 | if (index == path.get_leg_vector_size()) { |
2115 | 149 | return true; |
2116 | 149 | } |
2117 | | |
2118 | 203 | JsonbPath current; |
2119 | 203 | auto* current_leg = path.get_leg_from_leg_vector(index); |
2120 | 203 | std::unique_ptr<leg_info> leg = |
2121 | 203 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, |
2122 | 203 | current_leg->array_index, current_leg->type); |
2123 | 203 | current.add_leg_to_leg_vector(std::move(leg)); |
2124 | | |
2125 | 203 | auto find_result = root->findValue(current); |
2126 | 203 | if (!find_result.value) { |
2127 | 12 | std::string path_string; |
2128 | 12 | current.to_string(&path_string); |
2129 | 12 | return false; |
2130 | 191 | } else if (find_result.value == root) { |
2131 | 6 | return true; |
2132 | 185 | } else { |
2133 | 185 | parents.emplace_back(find_result.value); |
2134 | 185 | } |
2135 | | |
2136 | 185 | return build_parents_by_path(find_result.value, path, parents); |
2137 | 203 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2112 | 84 | std::vector<const JsonbValue*>& parents) const { | 2113 | 84 | const size_t index = parents.size() - 1; | 2114 | 84 | if (index == path.get_leg_vector_size()) { | 2115 | 43 | return true; | 2116 | 43 | } | 2117 | | | 2118 | 41 | JsonbPath current; | 2119 | 41 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2120 | 41 | std::unique_ptr<leg_info> leg = | 2121 | 41 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2122 | 41 | current_leg->array_index, current_leg->type); | 2123 | 41 | current.add_leg_to_leg_vector(std::move(leg)); | 2124 | | | 2125 | 41 | auto find_result = root->findValue(current); | 2126 | 41 | if (!find_result.value) { | 2127 | 1 | std::string path_string; | 2128 | 1 | current.to_string(&path_string); | 2129 | 1 | return false; | 2130 | 40 | } else if (find_result.value == root) { | 2131 | 0 | return true; | 2132 | 40 | } else { | 2133 | 40 | parents.emplace_back(find_result.value); | 2134 | 40 | } | 2135 | | | 2136 | 40 | return build_parents_by_path(find_result.value, path, parents); | 2137 | 41 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2112 | 137 | std::vector<const JsonbValue*>& parents) const { | 2113 | 137 | const size_t index = parents.size() - 1; | 2114 | 137 | if (index == path.get_leg_vector_size()) { | 2115 | 53 | return true; | 2116 | 53 | } | 2117 | | | 2118 | 84 | JsonbPath current; | 2119 | 84 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2120 | 84 | std::unique_ptr<leg_info> leg = | 2121 | 84 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2122 | 84 | current_leg->array_index, current_leg->type); | 2123 | 84 | current.add_leg_to_leg_vector(std::move(leg)); | 2124 | | | 2125 | 84 | auto find_result = root->findValue(current); | 2126 | 84 | if (!find_result.value) { | 2127 | 11 | std::string path_string; | 2128 | 11 | current.to_string(&path_string); | 2129 | 11 | return false; | 2130 | 73 | } else if (find_result.value == root) { | 2131 | 3 | return true; | 2132 | 70 | } else { | 2133 | 70 | parents.emplace_back(find_result.value); | 2134 | 70 | } | 2135 | | | 2136 | 70 | return build_parents_by_path(find_result.value, path, parents); | 2137 | 84 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Line | Count | Source | 2112 | 131 | std::vector<const JsonbValue*>& parents) const { | 2113 | 131 | const size_t index = parents.size() - 1; | 2114 | 131 | if (index == path.get_leg_vector_size()) { | 2115 | 53 | return true; | 2116 | 53 | } | 2117 | | | 2118 | 78 | JsonbPath current; | 2119 | 78 | auto* current_leg = path.get_leg_from_leg_vector(index); | 2120 | 78 | std::unique_ptr<leg_info> leg = | 2121 | 78 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, | 2122 | 78 | current_leg->array_index, current_leg->type); | 2123 | 78 | current.add_leg_to_leg_vector(std::move(leg)); | 2124 | | | 2125 | 78 | auto find_result = root->findValue(current); | 2126 | 78 | if (!find_result.value) { | 2127 | 0 | std::string path_string; | 2128 | 0 | current.to_string(&path_string); | 2129 | 0 | return false; | 2130 | 78 | } else if (find_result.value == root) { | 2131 | 3 | return true; | 2132 | 75 | } else { | 2133 | 75 | parents.emplace_back(find_result.value); | 2134 | 75 | } | 2135 | | | 2136 | 75 | return build_parents_by_path(find_result.value, path, parents); | 2137 | 78 | } |
|
2138 | | |
2139 | | Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents, |
2140 | | const size_t parent_index, const JsonbValue* value, const bool replace, |
2141 | 340 | const leg_info* last_leg, JsonbWriter& writer) const { |
2142 | 340 | if (parent_index >= parents.size()) { |
2143 | 0 | return Status::InvalidArgument( |
2144 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", |
2145 | 0 | parent_index, parents.size()); |
2146 | 0 | } |
2147 | | |
2148 | 340 | if (parents[parent_index] != root) { |
2149 | 0 | return Status::InvalidArgument( |
2150 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " |
2151 | 0 | "parents size: {}", |
2152 | 0 | parent_index, parents.size()); |
2153 | 0 | } |
2154 | | |
2155 | 340 | if (parent_index == parents.size() - 1 && replace) { |
2156 | | // We are at the last parent, write the value directly |
2157 | 100 | if (value == nullptr) { |
2158 | 24 | writer.writeNull(); |
2159 | 76 | } else { |
2160 | 76 | writer.writeValue(value); |
2161 | 76 | } |
2162 | 100 | return Status::OK(); |
2163 | 100 | } |
2164 | | |
2165 | 240 | bool value_written = false; |
2166 | 240 | bool is_last_parent = (parent_index == parents.size() - 1); |
2167 | 240 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; |
2168 | 240 | if (root->isArray()) { |
2169 | 23 | writer.writeStartArray(); |
2170 | 23 | const auto* array_val = root->unpack<ArrayVal>(); |
2171 | 67 | for (int i = 0; i != array_val->numElem(); ++i) { |
2172 | 44 | auto* it = array_val->get(i); |
2173 | | |
2174 | 44 | if (is_last_parent && last_leg->array_index == i) { |
2175 | 0 | value_written = true; |
2176 | 0 | writer.writeValue(value); |
2177 | 44 | } else if (it == next_parent) { |
2178 | 13 | value_written = true; |
2179 | 13 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, |
2180 | 13 | last_leg, writer)); |
2181 | 31 | } else { |
2182 | 31 | writer.writeValue(it); |
2183 | 31 | } |
2184 | 44 | } |
2185 | 23 | if (is_last_parent && !value_written) { |
2186 | 10 | value_written = true; |
2187 | 10 | writer.writeValue(value); |
2188 | 10 | } |
2189 | | |
2190 | 23 | writer.writeEndArray(); |
2191 | | |
2192 | 217 | } else { |
2193 | | /** |
2194 | | Because even for a non-array object, `$[0]` can still point to that object: |
2195 | | ``` |
2196 | | select json_extract('{"key": "value"}', '$[0]'); |
2197 | | +------------------------------------------+ |
2198 | | | json_extract('{"key": "value"}', '$[0]') | |
2199 | | +------------------------------------------+ |
2200 | | | {"key": "value"} | |
2201 | | +------------------------------------------+ |
2202 | | ``` |
2203 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, |
2204 | | it should be converted to an array before insertion: |
2205 | | ``` |
2206 | | select json_insert('123','$[1]', null); |
2207 | | +---------------------------------+ |
2208 | | | json_insert('123','$[1]', null) | |
2209 | | +---------------------------------+ |
2210 | | | [123, null] | |
2211 | | +---------------------------------+ |
2212 | | ``` |
2213 | | */ |
2214 | 217 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { |
2215 | 8 | writer.writeStartArray(); |
2216 | 8 | writer.writeValue(root); |
2217 | 8 | writer.writeValue(value); |
2218 | 8 | writer.writeEndArray(); |
2219 | 8 | return Status::OK(); |
2220 | 209 | } else if (root->isObject()) { |
2221 | 209 | writer.writeStartObject(); |
2222 | 209 | const auto* object_val = root->unpack<ObjectVal>(); |
2223 | 403 | for (const auto& it : *object_val) { |
2224 | 403 | writer.writeKey(it.getKeyStr(), it.klen()); |
2225 | 403 | if (it.value() == next_parent) { |
2226 | 172 | value_written = true; |
2227 | 172 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, |
2228 | 172 | value, replace, last_leg, writer)); |
2229 | 231 | } else { |
2230 | 231 | writer.writeValue(it.value()); |
2231 | 231 | } |
2232 | 403 | } |
2233 | | |
2234 | 209 | if (is_last_parent && !value_written) { |
2235 | 37 | value_written = true; |
2236 | 37 | writer.writeStartObject(); |
2237 | 37 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); |
2238 | 37 | writer.writeValue(value); |
2239 | 37 | writer.writeEndObject(); |
2240 | 37 | } |
2241 | 209 | writer.writeEndObject(); |
2242 | | |
2243 | 209 | } else { |
2244 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); |
2245 | 0 | } |
2246 | 217 | } |
2247 | | |
2248 | 232 | if (!value_written) { |
2249 | 0 | return Status::InvalidArgument( |
2250 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", |
2251 | 0 | parent_index, parents.size()); |
2252 | 0 | } |
2253 | | |
2254 | 232 | return Status::OK(); |
2255 | 232 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2141 | 83 | const leg_info* last_leg, JsonbWriter& writer) const { | 2142 | 83 | if (parent_index >= parents.size()) { | 2143 | 0 | return Status::InvalidArgument( | 2144 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2145 | 0 | parent_index, parents.size()); | 2146 | 0 | } | 2147 | | | 2148 | 83 | if (parents[parent_index] != root) { | 2149 | 0 | return Status::InvalidArgument( | 2150 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2151 | 0 | "parents size: {}", | 2152 | 0 | parent_index, parents.size()); | 2153 | 0 | } | 2154 | | | 2155 | 83 | if (parent_index == parents.size() - 1 && replace) { | 2156 | | // We are at the last parent, write the value directly | 2157 | 0 | if (value == nullptr) { | 2158 | 0 | writer.writeNull(); | 2159 | 0 | } else { | 2160 | 0 | writer.writeValue(value); | 2161 | 0 | } | 2162 | 0 | return Status::OK(); | 2163 | 0 | } | 2164 | | | 2165 | 83 | bool value_written = false; | 2166 | 83 | bool is_last_parent = (parent_index == parents.size() - 1); | 2167 | 83 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2168 | 83 | if (root->isArray()) { | 2169 | 5 | writer.writeStartArray(); | 2170 | 5 | const auto* array_val = root->unpack<ArrayVal>(); | 2171 | 14 | for (int i = 0; i != array_val->numElem(); ++i) { | 2172 | 9 | auto* it = array_val->get(i); | 2173 | | | 2174 | 9 | if (is_last_parent && last_leg->array_index == i) { | 2175 | 0 | value_written = true; | 2176 | 0 | writer.writeValue(value); | 2177 | 9 | } else if (it == next_parent) { | 2178 | 0 | value_written = true; | 2179 | 0 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2180 | 0 | last_leg, writer)); | 2181 | 9 | } else { | 2182 | 9 | writer.writeValue(it); | 2183 | 9 | } | 2184 | 9 | } | 2185 | 5 | if (is_last_parent && !value_written) { | 2186 | 5 | value_written = true; | 2187 | 5 | writer.writeValue(value); | 2188 | 5 | } | 2189 | | | 2190 | 5 | writer.writeEndArray(); | 2191 | | | 2192 | 78 | } else { | 2193 | | /** | 2194 | | Because even for a non-array object, `$[0]` can still point to that object: | 2195 | | ``` | 2196 | | select json_extract('{"key": "value"}', '$[0]'); | 2197 | | +------------------------------------------+ | 2198 | | | json_extract('{"key": "value"}', '$[0]') | | 2199 | | +------------------------------------------+ | 2200 | | | {"key": "value"} | | 2201 | | +------------------------------------------+ | 2202 | | ``` | 2203 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2204 | | it should be converted to an array before insertion: | 2205 | | ``` | 2206 | | select json_insert('123','$[1]', null); | 2207 | | +---------------------------------+ | 2208 | | | json_insert('123','$[1]', null) | | 2209 | | +---------------------------------+ | 2210 | | | [123, null] | | 2211 | | +---------------------------------+ | 2212 | | ``` | 2213 | | */ | 2214 | 78 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2215 | 4 | writer.writeStartArray(); | 2216 | 4 | writer.writeValue(root); | 2217 | 4 | writer.writeValue(value); | 2218 | 4 | writer.writeEndArray(); | 2219 | 4 | return Status::OK(); | 2220 | 74 | } else if (root->isObject()) { | 2221 | 74 | writer.writeStartObject(); | 2222 | 74 | const auto* object_val = root->unpack<ObjectVal>(); | 2223 | 74 | for (const auto& it : *object_val) { | 2224 | 70 | writer.writeKey(it.getKeyStr(), it.klen()); | 2225 | 70 | if (it.value() == next_parent) { | 2226 | 40 | value_written = true; | 2227 | 40 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2228 | 40 | value, replace, last_leg, writer)); | 2229 | 40 | } else { | 2230 | 30 | writer.writeValue(it.value()); | 2231 | 30 | } | 2232 | 70 | } | 2233 | | | 2234 | 74 | if (is_last_parent && !value_written) { | 2235 | 34 | value_written = true; | 2236 | 34 | writer.writeStartObject(); | 2237 | 34 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2238 | 34 | writer.writeValue(value); | 2239 | 34 | writer.writeEndObject(); | 2240 | 34 | } | 2241 | 74 | writer.writeEndObject(); | 2242 | | | 2243 | 74 | } else { | 2244 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2245 | 0 | } | 2246 | 78 | } | 2247 | | | 2248 | 79 | if (!value_written) { | 2249 | 0 | return Status::InvalidArgument( | 2250 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2251 | 0 | parent_index, parents.size()); | 2252 | 0 | } | 2253 | | | 2254 | 79 | return Status::OK(); | 2255 | 79 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2141 | 126 | const leg_info* last_leg, JsonbWriter& writer) const { | 2142 | 126 | if (parent_index >= parents.size()) { | 2143 | 0 | return Status::InvalidArgument( | 2144 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2145 | 0 | parent_index, parents.size()); | 2146 | 0 | } | 2147 | | | 2148 | 126 | if (parents[parent_index] != root) { | 2149 | 0 | return Status::InvalidArgument( | 2150 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2151 | 0 | "parents size: {}", | 2152 | 0 | parent_index, parents.size()); | 2153 | 0 | } | 2154 | | | 2155 | 126 | if (parent_index == parents.size() - 1 && replace) { | 2156 | | // We are at the last parent, write the value directly | 2157 | 44 | if (value == nullptr) { | 2158 | 10 | writer.writeNull(); | 2159 | 34 | } else { | 2160 | 34 | writer.writeValue(value); | 2161 | 34 | } | 2162 | 44 | return Status::OK(); | 2163 | 44 | } | 2164 | | | 2165 | 82 | bool value_written = false; | 2166 | 82 | bool is_last_parent = (parent_index == parents.size() - 1); | 2167 | 82 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2168 | 82 | if (root->isArray()) { | 2169 | 10 | writer.writeStartArray(); | 2170 | 10 | const auto* array_val = root->unpack<ArrayVal>(); | 2171 | 29 | for (int i = 0; i != array_val->numElem(); ++i) { | 2172 | 19 | auto* it = array_val->get(i); | 2173 | | | 2174 | 19 | if (is_last_parent && last_leg->array_index == i) { | 2175 | 0 | value_written = true; | 2176 | 0 | writer.writeValue(value); | 2177 | 19 | } else if (it == next_parent) { | 2178 | 5 | value_written = true; | 2179 | 5 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2180 | 5 | last_leg, writer)); | 2181 | 14 | } else { | 2182 | 14 | writer.writeValue(it); | 2183 | 14 | } | 2184 | 19 | } | 2185 | 10 | if (is_last_parent && !value_written) { | 2186 | 5 | value_written = true; | 2187 | 5 | writer.writeValue(value); | 2188 | 5 | } | 2189 | | | 2190 | 10 | writer.writeEndArray(); | 2191 | | | 2192 | 72 | } else { | 2193 | | /** | 2194 | | Because even for a non-array object, `$[0]` can still point to that object: | 2195 | | ``` | 2196 | | select json_extract('{"key": "value"}', '$[0]'); | 2197 | | +------------------------------------------+ | 2198 | | | json_extract('{"key": "value"}', '$[0]') | | 2199 | | +------------------------------------------+ | 2200 | | | {"key": "value"} | | 2201 | | +------------------------------------------+ | 2202 | | ``` | 2203 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2204 | | it should be converted to an array before insertion: | 2205 | | ``` | 2206 | | select json_insert('123','$[1]', null); | 2207 | | +---------------------------------+ | 2208 | | | json_insert('123','$[1]', null) | | 2209 | | +---------------------------------+ | 2210 | | | [123, null] | | 2211 | | +---------------------------------+ | 2212 | | ``` | 2213 | | */ | 2214 | 72 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2215 | 4 | writer.writeStartArray(); | 2216 | 4 | writer.writeValue(root); | 2217 | 4 | writer.writeValue(value); | 2218 | 4 | writer.writeEndArray(); | 2219 | 4 | return Status::OK(); | 2220 | 68 | } else if (root->isObject()) { | 2221 | 68 | writer.writeStartObject(); | 2222 | 68 | const auto* object_val = root->unpack<ObjectVal>(); | 2223 | 158 | for (const auto& it : *object_val) { | 2224 | 158 | writer.writeKey(it.getKeyStr(), it.klen()); | 2225 | 158 | if (it.value() == next_parent) { | 2226 | 65 | value_written = true; | 2227 | 65 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2228 | 65 | value, replace, last_leg, writer)); | 2229 | 93 | } else { | 2230 | 93 | writer.writeValue(it.value()); | 2231 | 93 | } | 2232 | 158 | } | 2233 | | | 2234 | 68 | if (is_last_parent && !value_written) { | 2235 | 3 | value_written = true; | 2236 | 3 | writer.writeStartObject(); | 2237 | 3 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2238 | 3 | writer.writeValue(value); | 2239 | 3 | writer.writeEndObject(); | 2240 | 3 | } | 2241 | 68 | writer.writeEndObject(); | 2242 | | | 2243 | 68 | } else { | 2244 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2245 | 0 | } | 2246 | 72 | } | 2247 | | | 2248 | 78 | if (!value_written) { | 2249 | 0 | return Status::InvalidArgument( | 2250 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2251 | 0 | parent_index, parents.size()); | 2252 | 0 | } | 2253 | | | 2254 | 78 | return Status::OK(); | 2255 | 78 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Line | Count | Source | 2141 | 131 | const leg_info* last_leg, JsonbWriter& writer) const { | 2142 | 131 | if (parent_index >= parents.size()) { | 2143 | 0 | return Status::InvalidArgument( | 2144 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", | 2145 | 0 | parent_index, parents.size()); | 2146 | 0 | } | 2147 | | | 2148 | 131 | if (parents[parent_index] != root) { | 2149 | 0 | return Status::InvalidArgument( | 2150 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " | 2151 | 0 | "parents size: {}", | 2152 | 0 | parent_index, parents.size()); | 2153 | 0 | } | 2154 | | | 2155 | 131 | if (parent_index == parents.size() - 1 && replace) { | 2156 | | // We are at the last parent, write the value directly | 2157 | 56 | if (value == nullptr) { | 2158 | 14 | writer.writeNull(); | 2159 | 42 | } else { | 2160 | 42 | writer.writeValue(value); | 2161 | 42 | } | 2162 | 56 | return Status::OK(); | 2163 | 56 | } | 2164 | | | 2165 | 75 | bool value_written = false; | 2166 | 75 | bool is_last_parent = (parent_index == parents.size() - 1); | 2167 | 75 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; | 2168 | 75 | if (root->isArray()) { | 2169 | 8 | writer.writeStartArray(); | 2170 | 8 | const auto* array_val = root->unpack<ArrayVal>(); | 2171 | 24 | for (int i = 0; i != array_val->numElem(); ++i) { | 2172 | 16 | auto* it = array_val->get(i); | 2173 | | | 2174 | 16 | if (is_last_parent && last_leg->array_index == i) { | 2175 | 0 | value_written = true; | 2176 | 0 | writer.writeValue(value); | 2177 | 16 | } else if (it == next_parent) { | 2178 | 8 | value_written = true; | 2179 | 8 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, | 2180 | 8 | last_leg, writer)); | 2181 | 8 | } else { | 2182 | 8 | writer.writeValue(it); | 2183 | 8 | } | 2184 | 16 | } | 2185 | 8 | if (is_last_parent && !value_written) { | 2186 | 0 | value_written = true; | 2187 | 0 | writer.writeValue(value); | 2188 | 0 | } | 2189 | | | 2190 | 8 | writer.writeEndArray(); | 2191 | | | 2192 | 67 | } else { | 2193 | | /** | 2194 | | Because even for a non-array object, `$[0]` can still point to that object: | 2195 | | ``` | 2196 | | select json_extract('{"key": "value"}', '$[0]'); | 2197 | | +------------------------------------------+ | 2198 | | | json_extract('{"key": "value"}', '$[0]') | | 2199 | | +------------------------------------------+ | 2200 | | | {"key": "value"} | | 2201 | | +------------------------------------------+ | 2202 | | ``` | 2203 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, | 2204 | | it should be converted to an array before insertion: | 2205 | | ``` | 2206 | | select json_insert('123','$[1]', null); | 2207 | | +---------------------------------+ | 2208 | | | json_insert('123','$[1]', null) | | 2209 | | +---------------------------------+ | 2210 | | | [123, null] | | 2211 | | +---------------------------------+ | 2212 | | ``` | 2213 | | */ | 2214 | 67 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { | 2215 | 0 | writer.writeStartArray(); | 2216 | 0 | writer.writeValue(root); | 2217 | 0 | writer.writeValue(value); | 2218 | 0 | writer.writeEndArray(); | 2219 | 0 | return Status::OK(); | 2220 | 67 | } else if (root->isObject()) { | 2221 | 67 | writer.writeStartObject(); | 2222 | 67 | const auto* object_val = root->unpack<ObjectVal>(); | 2223 | 175 | for (const auto& it : *object_val) { | 2224 | 175 | writer.writeKey(it.getKeyStr(), it.klen()); | 2225 | 175 | if (it.value() == next_parent) { | 2226 | 67 | value_written = true; | 2227 | 67 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, | 2228 | 67 | value, replace, last_leg, writer)); | 2229 | 108 | } else { | 2230 | 108 | writer.writeValue(it.value()); | 2231 | 108 | } | 2232 | 175 | } | 2233 | | | 2234 | 67 | if (is_last_parent && !value_written) { | 2235 | 0 | value_written = true; | 2236 | 0 | writer.writeStartObject(); | 2237 | 0 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); | 2238 | 0 | writer.writeValue(value); | 2239 | 0 | writer.writeEndObject(); | 2240 | 0 | } | 2241 | 67 | writer.writeEndObject(); | 2242 | | | 2243 | 67 | } else { | 2244 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); | 2245 | 0 | } | 2246 | 67 | } | 2247 | | | 2248 | 75 | if (!value_written) { | 2249 | 0 | return Status::InvalidArgument( | 2250 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", | 2251 | 0 | parent_index, parents.size()); | 2252 | 0 | } | 2253 | | | 2254 | 75 | return Status::OK(); | 2255 | 75 | } |
|
2256 | | |
2257 | | Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths, |
2258 | | DorisVector<DorisVector<const JsonbValue*>>& json_values, |
2259 | | const ColumnNumbers& arguments, const size_t input_rows_count, |
2260 | | const std::vector<const ColumnString*>& json_path_columns, |
2261 | | const std::vector<bool>& json_path_constant, |
2262 | | const std::vector<const NullMap*>& json_path_null_maps, |
2263 | | const std::vector<const ColumnString*>& json_value_columns, |
2264 | | const std::vector<bool>& json_value_constant, |
2265 | 85 | const std::vector<const NullMap*>& json_value_null_maps) const { |
2266 | 187 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2267 | 110 | const size_t index = i / 2; |
2268 | 110 | const auto* json_path_column = json_path_columns[index]; |
2269 | 110 | const auto* value_column = json_value_columns[index]; |
2270 | | |
2271 | 110 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); |
2272 | 110 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); |
2273 | | |
2274 | 225 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { |
2275 | 123 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { |
2276 | 6 | continue; |
2277 | 6 | } |
2278 | | |
2279 | 117 | auto path_string = json_path_column->get_data_at(row_idx); |
2280 | 117 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { |
2281 | 3 | return Status::InvalidArgument( |
2282 | 3 | "Json path error: Invalid Json Path for value: {}, " |
2283 | 3 | "argument " |
2284 | 3 | "index: {}, row index: {}", |
2285 | 3 | std::string_view(path_string.data, path_string.size), i, row_idx); |
2286 | 3 | } |
2287 | | |
2288 | 114 | if (json_paths[index][row_idx].is_wildcard()) { |
2289 | 5 | return Status::InvalidArgument( |
2290 | 5 | "In this situation, path expressions may not contain the * and ** " |
2291 | 5 | "tokens, argument index: {}, row index: {}", |
2292 | 5 | i, row_idx); |
2293 | 5 | } |
2294 | 114 | } |
2295 | | |
2296 | 302 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { |
2297 | 200 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { |
2298 | 48 | continue; |
2299 | 48 | } |
2300 | | |
2301 | 152 | auto value_string = value_column->get_data_at(row_idx); |
2302 | 152 | const JsonbDocument* doc = nullptr; |
2303 | 152 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
2304 | 152 | value_string.size, &doc)); |
2305 | 152 | if (doc) { |
2306 | 152 | json_values[index][row_idx] = doc->getValue(); |
2307 | 152 | } |
2308 | 152 | } |
2309 | 102 | } |
2310 | | |
2311 | 77 | return Status::OK(); |
2312 | 85 | } _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2265 | 29 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2266 | 61 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2267 | 36 | const size_t index = i / 2; | 2268 | 36 | const auto* json_path_column = json_path_columns[index]; | 2269 | 36 | const auto* value_column = json_value_columns[index]; | 2270 | | | 2271 | 36 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2272 | 36 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2273 | | | 2274 | 79 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2275 | 47 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2276 | 4 | continue; | 2277 | 4 | } | 2278 | | | 2279 | 43 | auto path_string = json_path_column->get_data_at(row_idx); | 2280 | 43 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2281 | 1 | return Status::InvalidArgument( | 2282 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2283 | 1 | "argument " | 2284 | 1 | "index: {}, row index: {}", | 2285 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2286 | 1 | } | 2287 | | | 2288 | 42 | if (json_paths[index][row_idx].is_wildcard()) { | 2289 | 3 | return Status::InvalidArgument( | 2290 | 3 | "In this situation, path expressions may not contain the * and ** " | 2291 | 3 | "tokens, argument index: {}, row index: {}", | 2292 | 3 | i, row_idx); | 2293 | 3 | } | 2294 | 42 | } | 2295 | | | 2296 | 96 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2297 | 64 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2298 | 14 | continue; | 2299 | 14 | } | 2300 | | | 2301 | 50 | auto value_string = value_column->get_data_at(row_idx); | 2302 | 50 | const JsonbDocument* doc = nullptr; | 2303 | 50 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2304 | 50 | value_string.size, &doc)); | 2305 | 50 | if (doc) { | 2306 | 50 | json_values[index][row_idx] = doc->getValue(); | 2307 | 50 | } | 2308 | 50 | } | 2309 | 32 | } | 2310 | | | 2311 | 25 | return Status::OK(); | 2312 | 29 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2265 | 28 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2266 | 62 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2267 | 36 | const size_t index = i / 2; | 2268 | 36 | const auto* json_path_column = json_path_columns[index]; | 2269 | 36 | const auto* value_column = json_value_columns[index]; | 2270 | | | 2271 | 36 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2272 | 36 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2273 | | | 2274 | 72 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2275 | 38 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2276 | 1 | continue; | 2277 | 1 | } | 2278 | | | 2279 | 37 | auto path_string = json_path_column->get_data_at(row_idx); | 2280 | 37 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2281 | 1 | return Status::InvalidArgument( | 2282 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2283 | 1 | "argument " | 2284 | 1 | "index: {}, row index: {}", | 2285 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2286 | 1 | } | 2287 | | | 2288 | 36 | if (json_paths[index][row_idx].is_wildcard()) { | 2289 | 1 | return Status::InvalidArgument( | 2290 | 1 | "In this situation, path expressions may not contain the * and ** " | 2291 | 1 | "tokens, argument index: {}, row index: {}", | 2292 | 1 | i, row_idx); | 2293 | 1 | } | 2294 | 36 | } | 2295 | | | 2296 | 102 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2297 | 68 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2298 | 16 | continue; | 2299 | 16 | } | 2300 | | | 2301 | 52 | auto value_string = value_column->get_data_at(row_idx); | 2302 | 52 | const JsonbDocument* doc = nullptr; | 2303 | 52 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2304 | 52 | value_string.size, &doc)); | 2305 | 52 | if (doc) { | 2306 | 52 | json_values[index][row_idx] = doc->getValue(); | 2307 | 52 | } | 2308 | 52 | } | 2309 | 34 | } | 2310 | | | 2311 | 26 | return Status::OK(); | 2312 | 28 | } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_ Line | Count | Source | 2265 | 28 | const std::vector<const NullMap*>& json_value_null_maps) const { | 2266 | 64 | for (size_t i = 1; i < arguments.size(); i += 2) { | 2267 | 38 | const size_t index = i / 2; | 2268 | 38 | const auto* json_path_column = json_path_columns[index]; | 2269 | 38 | const auto* value_column = json_value_columns[index]; | 2270 | | | 2271 | 38 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); | 2272 | 38 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); | 2273 | | | 2274 | 74 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { | 2275 | 38 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { | 2276 | 1 | continue; | 2277 | 1 | } | 2278 | | | 2279 | 37 | auto path_string = json_path_column->get_data_at(row_idx); | 2280 | 37 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { | 2281 | 1 | return Status::InvalidArgument( | 2282 | 1 | "Json path error: Invalid Json Path for value: {}, " | 2283 | 1 | "argument " | 2284 | 1 | "index: {}, row index: {}", | 2285 | 1 | std::string_view(path_string.data, path_string.size), i, row_idx); | 2286 | 1 | } | 2287 | | | 2288 | 36 | if (json_paths[index][row_idx].is_wildcard()) { | 2289 | 1 | return Status::InvalidArgument( | 2290 | 1 | "In this situation, path expressions may not contain the * and ** " | 2291 | 1 | "tokens, argument index: {}, row index: {}", | 2292 | 1 | i, row_idx); | 2293 | 1 | } | 2294 | 36 | } | 2295 | | | 2296 | 104 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { | 2297 | 68 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { | 2298 | 18 | continue; | 2299 | 18 | } | 2300 | | | 2301 | 50 | auto value_string = value_column->get_data_at(row_idx); | 2302 | 50 | const JsonbDocument* doc = nullptr; | 2303 | 50 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, | 2304 | 50 | value_string.size, &doc)); | 2305 | 50 | if (doc) { | 2306 | 50 | json_values[index][row_idx] = doc->getValue(); | 2307 | 50 | } | 2308 | 50 | } | 2309 | 36 | } | 2310 | | | 2311 | 26 | return Status::OK(); | 2312 | 28 | } |
|
2313 | | }; |
2314 | | |
2315 | | struct JsonbContainsAndPathImpl { |
2316 | 64 | static DataTypes get_variadic_argument_types() { |
2317 | 64 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(), |
2318 | 64 | std::make_shared<DataTypeString>()}; |
2319 | 64 | } |
2320 | | |
2321 | | static Status execute_impl(FunctionContext* context, Block& block, |
2322 | | const ColumnNumbers& arguments, uint32_t result, |
2323 | 131 | size_t input_rows_count) { |
2324 | 131 | return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result, |
2325 | 131 | input_rows_count); |
2326 | 131 | } |
2327 | | }; |
2328 | | |
2329 | | class FunctionJsonSearch : public IFunction { |
2330 | | private: |
2331 | | using OneFun = std::function<Status(size_t, bool*)>; |
2332 | 70 | static Status always_one(size_t i, bool* res) { |
2333 | 70 | *res = true; |
2334 | 70 | return Status::OK(); |
2335 | 70 | } |
2336 | 48 | static Status always_all(size_t i, bool* res) { |
2337 | 48 | *res = false; |
2338 | 48 | return Status::OK(); |
2339 | 48 | } |
2340 | | |
2341 | | using CheckNullFun = std::function<bool(size_t)>; |
2342 | 304 | static bool always_not_null(size_t) { return false; } |
2343 | | |
2344 | | using GetJsonStringRefFun = std::function<StringRef(size_t)>; |
2345 | | |
2346 | 346 | Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const { |
2347 | 346 | StringRef pattern; // not used |
2348 | 346 | StringRef value_val(str.data(), str.size()); |
2349 | 346 | return (state->scalar_function)(&state->search_state, value_val, pattern, res); |
2350 | 346 | } |
2351 | | |
2352 | | /** |
2353 | | * Recursive search for matching string, if found, the result will be added to a vector |
2354 | | * @param element json element |
2355 | | * @param one_match |
2356 | | * @param search_str |
2357 | | * @param cur_path |
2358 | | * @param matches The path that has already been matched |
2359 | | * @return true if matched else false |
2360 | | */ |
2361 | | bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state, |
2362 | 753 | JsonbPath* cur_path, std::unordered_set<std::string>* matches) const { |
2363 | 753 | if (element->isString()) { |
2364 | 346 | const auto* json_string = element->unpack<JsonbStringVal>(); |
2365 | 346 | const std::string_view element_str(json_string->getBlob(), json_string->length()); |
2366 | 346 | unsigned char res; |
2367 | 346 | RETURN_IF_ERROR(matched(element_str, state, &res)); |
2368 | 346 | if (res) { |
2369 | 223 | std::string str; |
2370 | 223 | auto valid = cur_path->to_string(&str); |
2371 | 223 | if (!valid) { |
2372 | 0 | return false; |
2373 | 0 | } |
2374 | 223 | return matches->insert(str).second; |
2375 | 223 | } else { |
2376 | 123 | return false; |
2377 | 123 | } |
2378 | 407 | } else if (element->isObject()) { |
2379 | 206 | const auto* object = element->unpack<ObjectVal>(); |
2380 | 206 | bool find = false; |
2381 | 212 | for (const auto& item : *object) { |
2382 | 212 | Slice key(item.getKeyStr(), item.klen()); |
2383 | 212 | const auto* child_element = item.value(); |
2384 | | // construct an object member path leg. |
2385 | 212 | auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE); |
2386 | 212 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2387 | 212 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2388 | 212 | cur_path->pop_leg_from_leg_vector(); |
2389 | 212 | if (one_match && find) { |
2390 | 8 | return true; |
2391 | 8 | } |
2392 | 212 | } |
2393 | 198 | return find; |
2394 | 206 | } else if (element->isArray()) { |
2395 | 201 | const auto* array = element->unpack<ArrayVal>(); |
2396 | 201 | bool find = false; |
2397 | 530 | for (int i = 0; i < array->numElem(); ++i) { |
2398 | 399 | auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE); |
2399 | 399 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2400 | 399 | const auto* child_element = array->get(i); |
2401 | | // construct an array cell path leg. |
2402 | 399 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2403 | 399 | cur_path->pop_leg_from_leg_vector(); |
2404 | 399 | if (one_match && find) { |
2405 | 70 | return true; |
2406 | 70 | } |
2407 | 399 | } |
2408 | 131 | return find; |
2409 | 201 | } else { |
2410 | 0 | return false; |
2411 | 0 | } |
2412 | 753 | } |
2413 | | |
2414 | | void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches, |
2415 | 128 | ColumnString* result_col) const { |
2416 | 128 | if (matches.size() == 1) { |
2417 | 95 | for (const auto& str_ref : matches) { |
2418 | 95 | writer.writeStartString(); |
2419 | 95 | writer.writeString(str_ref); |
2420 | 95 | writer.writeEndString(); |
2421 | 95 | } |
2422 | 95 | } else { |
2423 | 33 | writer.writeStartArray(); |
2424 | 128 | for (const auto& str_ref : matches) { |
2425 | 128 | writer.writeStartString(); |
2426 | 128 | writer.writeString(str_ref); |
2427 | 128 | writer.writeEndString(); |
2428 | 128 | } |
2429 | 33 | writer.writeEndArray(); |
2430 | 33 | } |
2431 | | |
2432 | 128 | result_col->insert_data(writer.getOutput()->getBuffer(), |
2433 | 128 | (size_t)writer.getOutput()->getSize()); |
2434 | 128 | } |
2435 | | |
2436 | | template <bool search_is_const> |
2437 | | Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check, |
2438 | | GetJsonStringRefFun col_json_string, CheckNullFun one_null_check, |
2439 | | OneFun one_check, CheckNullFun search_null_check, |
2440 | | const ColumnString* col_search_string, FunctionContext* context, |
2441 | 54 | size_t result) const { |
2442 | 54 | auto result_col = ColumnString::create(); |
2443 | 54 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
2444 | | |
2445 | 54 | std::shared_ptr<LikeState> state_ptr; |
2446 | 54 | LikeState* state = nullptr; |
2447 | 54 | if (search_is_const) { |
2448 | 8 | state = reinterpret_cast<LikeState*>( |
2449 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
2450 | 8 | } |
2451 | | |
2452 | 54 | bool is_one = false; |
2453 | | |
2454 | 54 | JsonbWriter writer; |
2455 | 226 | for (size_t i = 0; i < input_rows_count; ++i) { |
2456 | | // an error occurs if the json_doc argument is not a valid json document. |
2457 | 174 | if (json_null_check(i)) { |
2458 | 14 | null_map->get_data()[i] = 1; |
2459 | 14 | result_col->insert_data("", 0); |
2460 | 14 | continue; |
2461 | 14 | } |
2462 | 160 | const auto& json_doc_str = col_json_string(i); |
2463 | 160 | const JsonbDocument* json_doc = nullptr; |
2464 | 160 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, |
2465 | 160 | &json_doc); |
2466 | 160 | if (!st.ok()) { |
2467 | 0 | return Status::InvalidArgument( |
2468 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, |
2469 | 0 | st.to_string()); |
2470 | 0 | } |
2471 | | |
2472 | 160 | if (!one_null_check(i)) { |
2473 | 158 | RETURN_IF_ERROR(one_check(i, &is_one)); |
2474 | 158 | } |
2475 | | |
2476 | 158 | if (one_null_check(i) || search_null_check(i)) { |
2477 | 16 | null_map->get_data()[i] = 1; |
2478 | 16 | result_col->insert_data("", 0); |
2479 | 16 | continue; |
2480 | 16 | } |
2481 | | |
2482 | | // an error occurs if any path argument is not a valid path expression. |
2483 | 142 | std::string root_path_str = "$"; |
2484 | 142 | JsonbPath root_path; |
2485 | 142 | root_path.seek(root_path_str.c_str(), root_path_str.size()); |
2486 | 142 | std::vector<JsonbPath*> paths; |
2487 | 142 | paths.push_back(&root_path); |
2488 | | |
2489 | 142 | if (!search_is_const) { |
2490 | 110 | state_ptr = std::make_shared<LikeState>(); |
2491 | 110 | state_ptr->is_like_pattern = true; |
2492 | 110 | const auto& search_str = col_search_string->get_data_at(i); |
2493 | 110 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, |
2494 | 110 | state_ptr, false)); |
2495 | 110 | state = state_ptr.get(); |
2496 | 110 | } |
2497 | | |
2498 | | // maintain a hashset to deduplicate matches. |
2499 | 142 | std::unordered_set<std::string> matches; |
2500 | 142 | for (const auto& item : paths) { |
2501 | 142 | auto* cur_path = item; |
2502 | 142 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); |
2503 | 142 | if (is_one && find) { |
2504 | 75 | break; |
2505 | 75 | } |
2506 | 142 | } |
2507 | 142 | if (matches.empty()) { |
2508 | | // returns NULL if the search_str is not found in the document. |
2509 | 14 | null_map->get_data()[i] = 1; |
2510 | 14 | result_col->insert_data("", 0); |
2511 | 14 | continue; |
2512 | 14 | } |
2513 | | |
2514 | 128 | writer.reset(); |
2515 | 128 | make_result_str(writer, matches, result_col.get()); |
2516 | 128 | } |
2517 | 52 | auto result_col_nullable = |
2518 | 52 | ColumnNullable::create(std::move(result_col), std::move(null_map)); |
2519 | 52 | block.replace_by_position(result, std::move(result_col_nullable)); |
2520 | 52 | return Status::OK(); |
2521 | 54 | } _ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Line | Count | Source | 2441 | 8 | size_t result) const { | 2442 | 8 | auto result_col = ColumnString::create(); | 2443 | 8 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 2444 | | | 2445 | 8 | std::shared_ptr<LikeState> state_ptr; | 2446 | 8 | LikeState* state = nullptr; | 2447 | 8 | if (search_is_const) { | 2448 | 8 | state = reinterpret_cast<LikeState*>( | 2449 | 8 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 2450 | 8 | } | 2451 | | | 2452 | 8 | bool is_one = false; | 2453 | | | 2454 | 8 | JsonbWriter writer; | 2455 | 44 | for (size_t i = 0; i < input_rows_count; ++i) { | 2456 | | // an error occurs if the json_doc argument is not a valid json document. | 2457 | 36 | if (json_null_check(i)) { | 2458 | 4 | null_map->get_data()[i] = 1; | 2459 | 4 | result_col->insert_data("", 0); | 2460 | 4 | continue; | 2461 | 4 | } | 2462 | 32 | const auto& json_doc_str = col_json_string(i); | 2463 | 32 | const JsonbDocument* json_doc = nullptr; | 2464 | 32 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, | 2465 | 32 | &json_doc); | 2466 | 32 | if (!st.ok()) { | 2467 | 0 | return Status::InvalidArgument( | 2468 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, | 2469 | 0 | st.to_string()); | 2470 | 0 | } | 2471 | | | 2472 | 32 | if (!one_null_check(i)) { | 2473 | 32 | RETURN_IF_ERROR(one_check(i, &is_one)); | 2474 | 32 | } | 2475 | | | 2476 | 32 | if (one_null_check(i) || search_null_check(i)) { | 2477 | 0 | null_map->get_data()[i] = 1; | 2478 | 0 | result_col->insert_data("", 0); | 2479 | 0 | continue; | 2480 | 0 | } | 2481 | | | 2482 | | // an error occurs if any path argument is not a valid path expression. | 2483 | 32 | std::string root_path_str = "$"; | 2484 | 32 | JsonbPath root_path; | 2485 | 32 | root_path.seek(root_path_str.c_str(), root_path_str.size()); | 2486 | 32 | std::vector<JsonbPath*> paths; | 2487 | 32 | paths.push_back(&root_path); | 2488 | | | 2489 | 32 | if (!search_is_const) { | 2490 | 0 | state_ptr = std::make_shared<LikeState>(); | 2491 | 0 | state_ptr->is_like_pattern = true; | 2492 | 0 | const auto& search_str = col_search_string->get_data_at(i); | 2493 | 0 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, | 2494 | 0 | state_ptr, false)); | 2495 | 0 | state = state_ptr.get(); | 2496 | 0 | } | 2497 | | | 2498 | | // maintain a hashset to deduplicate matches. | 2499 | 32 | std::unordered_set<std::string> matches; | 2500 | 32 | for (const auto& item : paths) { | 2501 | 32 | auto* cur_path = item; | 2502 | 32 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); | 2503 | 32 | if (is_one && find) { | 2504 | 16 | break; | 2505 | 16 | } | 2506 | 32 | } | 2507 | 32 | if (matches.empty()) { | 2508 | | // returns NULL if the search_str is not found in the document. | 2509 | 0 | null_map->get_data()[i] = 1; | 2510 | 0 | result_col->insert_data("", 0); | 2511 | 0 | continue; | 2512 | 0 | } | 2513 | | | 2514 | 32 | writer.reset(); | 2515 | 32 | make_result_str(writer, matches, result_col.get()); | 2516 | 32 | } | 2517 | 8 | auto result_col_nullable = | 2518 | 8 | ColumnNullable::create(std::move(result_col), std::move(null_map)); | 2519 | 8 | block.replace_by_position(result, std::move(result_col_nullable)); | 2520 | 8 | return Status::OK(); | 2521 | 8 | } |
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Line | Count | Source | 2441 | 46 | size_t result) const { | 2442 | 46 | auto result_col = ColumnString::create(); | 2443 | 46 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 2444 | | | 2445 | 46 | std::shared_ptr<LikeState> state_ptr; | 2446 | 46 | LikeState* state = nullptr; | 2447 | 46 | if (search_is_const) { | 2448 | 0 | state = reinterpret_cast<LikeState*>( | 2449 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); | 2450 | 0 | } | 2451 | | | 2452 | 46 | bool is_one = false; | 2453 | | | 2454 | 46 | JsonbWriter writer; | 2455 | 182 | for (size_t i = 0; i < input_rows_count; ++i) { | 2456 | | // an error occurs if the json_doc argument is not a valid json document. | 2457 | 138 | if (json_null_check(i)) { | 2458 | 10 | null_map->get_data()[i] = 1; | 2459 | 10 | result_col->insert_data("", 0); | 2460 | 10 | continue; | 2461 | 10 | } | 2462 | 128 | const auto& json_doc_str = col_json_string(i); | 2463 | 128 | const JsonbDocument* json_doc = nullptr; | 2464 | 128 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, | 2465 | 128 | &json_doc); | 2466 | 128 | if (!st.ok()) { | 2467 | 0 | return Status::InvalidArgument( | 2468 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, | 2469 | 0 | st.to_string()); | 2470 | 0 | } | 2471 | | | 2472 | 128 | if (!one_null_check(i)) { | 2473 | 126 | RETURN_IF_ERROR(one_check(i, &is_one)); | 2474 | 126 | } | 2475 | | | 2476 | 126 | if (one_null_check(i) || search_null_check(i)) { | 2477 | 16 | null_map->get_data()[i] = 1; | 2478 | 16 | result_col->insert_data("", 0); | 2479 | 16 | continue; | 2480 | 16 | } | 2481 | | | 2482 | | // an error occurs if any path argument is not a valid path expression. | 2483 | 110 | std::string root_path_str = "$"; | 2484 | 110 | JsonbPath root_path; | 2485 | 110 | root_path.seek(root_path_str.c_str(), root_path_str.size()); | 2486 | 110 | std::vector<JsonbPath*> paths; | 2487 | 110 | paths.push_back(&root_path); | 2488 | | | 2489 | 110 | if (!search_is_const) { | 2490 | 110 | state_ptr = std::make_shared<LikeState>(); | 2491 | 110 | state_ptr->is_like_pattern = true; | 2492 | 110 | const auto& search_str = col_search_string->get_data_at(i); | 2493 | 110 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, | 2494 | 110 | state_ptr, false)); | 2495 | 110 | state = state_ptr.get(); | 2496 | 110 | } | 2497 | | | 2498 | | // maintain a hashset to deduplicate matches. | 2499 | 110 | std::unordered_set<std::string> matches; | 2500 | 110 | for (const auto& item : paths) { | 2501 | 110 | auto* cur_path = item; | 2502 | 110 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); | 2503 | 110 | if (is_one && find) { | 2504 | 59 | break; | 2505 | 59 | } | 2506 | 110 | } | 2507 | 110 | if (matches.empty()) { | 2508 | | // returns NULL if the search_str is not found in the document. | 2509 | 14 | null_map->get_data()[i] = 1; | 2510 | 14 | result_col->insert_data("", 0); | 2511 | 14 | continue; | 2512 | 14 | } | 2513 | | | 2514 | 96 | writer.reset(); | 2515 | 96 | make_result_str(writer, matches, result_col.get()); | 2516 | 96 | } | 2517 | 44 | auto result_col_nullable = | 2518 | 44 | ColumnNullable::create(std::move(result_col), std::move(null_map)); | 2519 | 44 | block.replace_by_position(result, std::move(result_col_nullable)); | 2520 | 44 | return Status::OK(); | 2521 | 46 | } |
|
2522 | | |
2523 | | static constexpr auto one = "one"; |
2524 | | static constexpr auto all = "all"; |
2525 | | |
2526 | | public: |
2527 | | static constexpr auto name = "json_search"; |
2528 | 58 | static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); } |
2529 | | |
2530 | 1 | String get_name() const override { return name; } |
2531 | 50 | bool is_variadic() const override { return false; } |
2532 | 49 | size_t get_number_of_arguments() const override { return 3; } |
2533 | | |
2534 | 49 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2535 | 49 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2536 | 49 | } |
2537 | | |
2538 | 114 | bool use_default_implementation_for_nulls() const override { return false; } |
2539 | | |
2540 | 232 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
2541 | 232 | if (scope != FunctionContext::THREAD_LOCAL) { |
2542 | 49 | return Status::OK(); |
2543 | 49 | } |
2544 | 183 | if (context->is_col_constant(2)) { |
2545 | 85 | std::shared_ptr<LikeState> state = std::make_shared<LikeState>(); |
2546 | 85 | state->is_like_pattern = true; |
2547 | 85 | const auto pattern_col = context->get_constant_col(2)->column_ptr; |
2548 | 85 | const auto& pattern = pattern_col->get_data_at(0); |
2549 | 85 | RETURN_IF_ERROR( |
2550 | 85 | FunctionLike::construct_like_const_state(context, pattern, state, false)); |
2551 | 85 | context->set_function_state(scope, state); |
2552 | 85 | } |
2553 | 183 | return Status::OK(); |
2554 | 183 | } |
2555 | | |
2556 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2557 | 65 | uint32_t result, size_t input_rows_count) const override { |
2558 | | // the json_doc, one_or_all, and search_str must be given. |
2559 | | // and we require the positions are static. |
2560 | 65 | if (arguments.size() < 3) { |
2561 | 0 | return Status::InvalidArgument("too few arguments for function {}", name); |
2562 | 0 | } |
2563 | 65 | if (arguments.size() > 3) { |
2564 | 0 | return Status::NotSupported("escape and path params are not support now"); |
2565 | 0 | } |
2566 | | |
2567 | 65 | CheckNullFun json_null_check = always_not_null; |
2568 | 65 | GetJsonStringRefFun get_json_fun; |
2569 | | // prepare jsonb data column |
2570 | 65 | auto&& [col_json, json_is_const] = |
2571 | 65 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2572 | 65 | const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get()); |
2573 | 65 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) { |
2574 | 65 | col_json_string = |
2575 | 65 | check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get()); |
2576 | 65 | } |
2577 | | |
2578 | 65 | if (!col_json_string) { |
2579 | 0 | return Status::RuntimeError("Illegal arg json {} should be ColumnString", |
2580 | 0 | col_json->get_name()); |
2581 | 0 | } |
2582 | | |
2583 | 65 | auto create_all_null_result = [&]() { |
2584 | 6 | auto res_str = ColumnString::create(); |
2585 | 6 | res_str->insert_default(); |
2586 | 6 | auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1)); |
2587 | 6 | if (input_rows_count > 1) { |
2588 | 6 | block.get_by_position(result).column = |
2589 | 6 | ColumnConst::create(std::move(res), input_rows_count); |
2590 | 6 | } else { |
2591 | 0 | block.get_by_position(result).column = std::move(res); |
2592 | 0 | } |
2593 | 6 | return Status::OK(); |
2594 | 6 | }; |
2595 | | |
2596 | 65 | if (json_is_const) { |
2597 | 11 | if (col_json->is_null_at(0)) { |
2598 | 2 | return create_all_null_result(); |
2599 | 9 | } else { |
2600 | 9 | const auto& json_str = col_json_string->get_data_at(0); |
2601 | 36 | get_json_fun = [json_str](size_t i) { return json_str; }; |
2602 | 9 | } |
2603 | 54 | } else { |
2604 | 138 | json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); }; |
2605 | 124 | get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); }; |
2606 | 54 | } |
2607 | | |
2608 | | // one_or_all |
2609 | 63 | CheckNullFun one_null_check = always_not_null; |
2610 | 63 | OneFun one_check = always_one; |
2611 | 63 | auto&& [col_one, one_is_const] = |
2612 | 63 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2613 | 63 | one_is_const |= input_rows_count == 1; |
2614 | 63 | const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get()); |
2615 | 63 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) { |
2616 | 9 | col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2617 | 9 | } |
2618 | 63 | if (!col_one_string) { |
2619 | 0 | return Status::RuntimeError("Illegal arg one {} should be ColumnString", |
2620 | 0 | col_one->get_name()); |
2621 | 0 | } |
2622 | 63 | if (one_is_const) { |
2623 | 51 | if (col_one->is_null_at(0)) { |
2624 | 4 | return create_all_null_result(); |
2625 | 47 | } else { |
2626 | 47 | const auto& one_or_all = col_one_string->get_data_at(0); |
2627 | 47 | std::string one_or_all_str = one_or_all.to_string(); |
2628 | 47 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2629 | 17 | one_check = always_all; |
2630 | 30 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2631 | | // nothing |
2632 | 25 | } else { |
2633 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2634 | 5 | return Status::InvalidArgument( |
2635 | 5 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2636 | 5 | } |
2637 | 47 | } |
2638 | 51 | } else { |
2639 | 82 | one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); }; |
2640 | 40 | one_check = [col_one_string](size_t i, bool* is_one) { |
2641 | 40 | const auto& one_or_all = col_one_string->get_data_at(i); |
2642 | 40 | std::string one_or_all_str = one_or_all.to_string(); |
2643 | 40 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2644 | 22 | *is_one = false; |
2645 | 22 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2646 | 16 | *is_one = true; |
2647 | 16 | } else { |
2648 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2649 | 2 | return Status::InvalidArgument( |
2650 | 2 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2651 | 2 | } |
2652 | 38 | return Status::OK(); |
2653 | 40 | }; |
2654 | 12 | } |
2655 | | |
2656 | | // search_str |
2657 | 54 | auto&& [col_search, search_is_const] = |
2658 | 54 | unpack_if_const(block.get_by_position(arguments[2]).column); |
2659 | | |
2660 | 54 | const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get()); |
2661 | 54 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) { |
2662 | 28 | col_search_string = |
2663 | 28 | check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2664 | 28 | } |
2665 | 54 | if (!col_search_string) { |
2666 | 0 | return Status::RuntimeError("Illegal arg pattern {} should be ColumnString", |
2667 | 0 | col_search->get_name()); |
2668 | 0 | } |
2669 | 54 | if (search_is_const) { |
2670 | 8 | CheckNullFun search_null_check = always_not_null; |
2671 | 8 | if (col_search->is_null_at(0)) { |
2672 | 0 | return create_all_null_result(); |
2673 | 0 | } |
2674 | 8 | RETURN_IF_ERROR(execute_vector<true>( |
2675 | 8 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2676 | 8 | one_check, search_null_check, col_search_string, context, result)); |
2677 | 46 | } else { |
2678 | 124 | CheckNullFun search_null_check = [col_search](size_t i) { |
2679 | 124 | return col_search->is_null_at(i); |
2680 | 124 | }; |
2681 | 46 | RETURN_IF_ERROR(execute_vector<false>( |
2682 | 46 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2683 | 46 | one_check, search_null_check, col_search_string, context, result)); |
2684 | 46 | } |
2685 | 52 | return Status::OK(); |
2686 | 54 | } |
2687 | | }; |
2688 | | |
2689 | | struct DocumentBuffer { |
2690 | | std::unique_ptr<char[]> ptr; |
2691 | | size_t size = 0; |
2692 | | size_t capacity = 0; |
2693 | | }; |
2694 | | |
2695 | | class FunctionJsonbRemove : public IFunction { |
2696 | | public: |
2697 | | static constexpr auto name = "jsonb_remove"; |
2698 | | static constexpr auto alias = "json_remove"; |
2699 | | |
2700 | 31 | static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); } |
2701 | | |
2702 | 0 | String get_name() const override { return name; } |
2703 | | |
2704 | 0 | size_t get_number_of_arguments() const override { return 0; } |
2705 | 23 | bool is_variadic() const override { return true; } |
2706 | | |
2707 | 44 | bool use_default_implementation_for_nulls() const override { return false; } |
2708 | | |
2709 | 22 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2710 | 22 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2711 | 22 | } |
2712 | | |
2713 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2714 | 22 | uint32_t result, size_t input_rows_count) const override { |
2715 | 22 | DORIS_CHECK_GE(arguments.size(), 2); |
2716 | | |
2717 | | // Check if arguments count is valid (json_doc + at least one path) |
2718 | 22 | if (arguments.size() < 2) { |
2719 | 0 | return Status::InvalidArgument("json_remove requires at least 2 arguments"); |
2720 | 0 | } |
2721 | | |
2722 | 22 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
2723 | 22 | auto result_column = return_data_type->create_column(); |
2724 | 22 | auto& nullable_column = assert_cast<ColumnNullable&>(*result_column); |
2725 | 22 | auto& res_chars = |
2726 | 22 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars(); |
2727 | 22 | auto& res_offsets = |
2728 | 22 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets(); |
2729 | 22 | auto& null_map = nullable_column.get_null_map_data(); |
2730 | | |
2731 | 22 | res_chars.reserve(input_rows_count * 64); |
2732 | 22 | res_offsets.resize(input_rows_count); |
2733 | 22 | null_map.resize_fill(input_rows_count, 0); |
2734 | | |
2735 | | // Get JSON document column |
2736 | 22 | auto [json_column, json_const] = |
2737 | 22 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2738 | 22 | const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get()); |
2739 | 22 | const ColumnString* json_data_column = nullptr; |
2740 | 22 | const NullMap* json_null_map = nullptr; |
2741 | | |
2742 | 22 | if (json_nullable) { |
2743 | 22 | json_null_map = &json_nullable->get_null_map_data(); |
2744 | 22 | json_data_column = |
2745 | 22 | check_and_get_column<ColumnString>(&json_nullable->get_nested_column()); |
2746 | 22 | } else { |
2747 | 0 | json_data_column = check_and_get_column<ColumnString>(json_column.get()); |
2748 | 0 | } |
2749 | | |
2750 | 22 | if (!json_data_column) { |
2751 | 0 | return Status::InvalidArgument("First argument must be a JSON document"); |
2752 | 0 | } |
2753 | | |
2754 | | // Parse paths |
2755 | 22 | std::vector<const ColumnString*> path_columns; |
2756 | 22 | std::vector<const NullMap*> path_null_maps; |
2757 | 22 | std::vector<bool> path_constants; |
2758 | | |
2759 | 51 | for (size_t i = 1; i < arguments.size(); ++i) { |
2760 | 29 | auto [path_column, path_const] = |
2761 | 29 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2762 | 29 | const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get()); |
2763 | | |
2764 | 29 | if (path_nullable) { |
2765 | 6 | path_null_maps.push_back(&path_nullable->get_null_map_data()); |
2766 | 6 | path_columns.push_back( |
2767 | 6 | check_and_get_column<ColumnString>(&path_nullable->get_nested_column())); |
2768 | 23 | } else { |
2769 | 23 | path_null_maps.push_back(nullptr); |
2770 | 23 | path_columns.push_back(check_and_get_column<ColumnString>(path_column.get())); |
2771 | 23 | } |
2772 | | |
2773 | 29 | if (!path_columns.back()) { |
2774 | 0 | return Status::InvalidArgument( |
2775 | 0 | fmt::format("Argument {} must be a string path", i + 1)); |
2776 | 0 | } |
2777 | | |
2778 | 29 | path_constants.push_back(path_const); |
2779 | 29 | } |
2780 | | |
2781 | | // Reusable JsonbWriter for performance |
2782 | 22 | JsonbWriter writer; |
2783 | | |
2784 | 48 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { |
2785 | 28 | size_t json_idx = index_check_const(row_idx, json_const); |
2786 | | |
2787 | | // Check if JSON document is null |
2788 | 28 | if (json_null_map && (*json_null_map)[json_idx]) { |
2789 | 2 | null_map[row_idx] = 1; |
2790 | 2 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2791 | 2 | continue; |
2792 | 2 | } |
2793 | | |
2794 | | // Parse JSON document |
2795 | 26 | const auto& json_data = json_data_column->get_data_at(json_idx); |
2796 | 26 | const JsonbDocument* json_doc = nullptr; |
2797 | 26 | Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data, |
2798 | 26 | json_data.size, &json_doc); |
2799 | | |
2800 | 26 | if (!parse_status.ok() || !json_doc) { |
2801 | 0 | null_map[row_idx] = 1; |
2802 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2803 | 0 | continue; |
2804 | 0 | } |
2805 | | |
2806 | | // Check if any path is null |
2807 | 26 | bool has_null_path = false; |
2808 | 59 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2809 | 35 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2810 | 35 | if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) { |
2811 | 2 | has_null_path = true; |
2812 | 2 | break; |
2813 | 2 | } |
2814 | 35 | } |
2815 | | |
2816 | 26 | if (has_null_path) { |
2817 | 2 | null_map[row_idx] = 1; |
2818 | 2 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2819 | 2 | continue; |
2820 | 2 | } |
2821 | | |
2822 | 24 | std::vector<JsonbPath> paths; |
2823 | 24 | std::vector<bool> path_constants_vec; |
2824 | | |
2825 | 54 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2826 | 32 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2827 | 32 | const auto& path_data = path_columns[path_idx]->get_data_at(idx); |
2828 | | |
2829 | 32 | JsonbPath path; |
2830 | 32 | if (!path.seek(path_data.data, path_data.size)) { |
2831 | 1 | return Status::InvalidArgument( |
2832 | 1 | "Json path error: Invalid Json Path for value: {} at row: {}", |
2833 | 1 | std::string_view(path_data.data, path_data.size), row_idx); |
2834 | 1 | } |
2835 | | |
2836 | 31 | if (path.is_wildcard() || path.is_supper_wildcard()) { |
2837 | 1 | return Status::InvalidArgument( |
2838 | 1 | "In this situation, path expressions may not contain the * and ** " |
2839 | 1 | "tokens or an array range, argument index: {}, row index: {}", |
2840 | 1 | path_idx + 1, row_idx); |
2841 | 1 | } |
2842 | | |
2843 | 30 | paths.push_back(std::move(path)); |
2844 | 30 | path_constants_vec.push_back(path_constants[path_idx]); |
2845 | 30 | } |
2846 | | |
2847 | 22 | const JsonbValue* current_value = json_doc->getValue(); |
2848 | | |
2849 | 22 | DocumentBuffer tmp_buffer; |
2850 | | |
2851 | 52 | for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) { |
2852 | 30 | writer.reset(); |
2853 | | |
2854 | 30 | auto find_result = current_value->findValue(paths[path_idx]); |
2855 | | |
2856 | 30 | if (find_result.is_wildcard) { |
2857 | 0 | continue; |
2858 | 0 | } |
2859 | | |
2860 | 30 | if (find_result.value) { |
2861 | 24 | RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer)); |
2862 | | |
2863 | 24 | auto* writer_output = writer.getOutput(); |
2864 | 24 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2865 | 17 | tmp_buffer.capacity = |
2866 | 17 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2867 | 17 | tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity); |
2868 | 17 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2869 | 17 | } |
2870 | | |
2871 | 24 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), |
2872 | 24 | writer_output->getSize()); |
2873 | 24 | tmp_buffer.size = writer_output->getSize(); |
2874 | | |
2875 | 24 | const JsonbDocument* new_doc = nullptr; |
2876 | 24 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2877 | 24 | tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc)); |
2878 | | |
2879 | 24 | current_value = new_doc->getValue(); |
2880 | 24 | } |
2881 | 30 | } |
2882 | | |
2883 | 22 | const JsonbDocument* modified_doc = nullptr; |
2884 | 22 | if (current_value != json_doc->getValue()) { |
2885 | 17 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2886 | 17 | tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc)); |
2887 | 17 | } else { |
2888 | 5 | modified_doc = json_doc; |
2889 | 5 | } |
2890 | | |
2891 | | // Write the final result |
2892 | 22 | const auto size = modified_doc->numPackedBytes(); |
2893 | 22 | res_chars.insert(reinterpret_cast<const char*>(modified_doc), |
2894 | 22 | reinterpret_cast<const char*>(modified_doc) + size); |
2895 | 22 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2896 | 22 | } |
2897 | | |
2898 | 20 | block.get_by_position(result).column = std::move(result_column); |
2899 | 20 | return Status::OK(); |
2900 | 22 | } |
2901 | | |
2902 | | private: |
2903 | | Status clone_without_path(const JsonbValue* root, const JsonbPath& path, |
2904 | 24 | JsonbWriter& writer) const { |
2905 | | // Start writing at the root level |
2906 | 24 | if (root->isObject()) { |
2907 | 15 | writer.writeStartObject(); |
2908 | 15 | RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer)); |
2909 | 15 | writer.writeEndObject(); |
2910 | 15 | } else if (root->isArray()) { |
2911 | 9 | writer.writeStartArray(); |
2912 | 9 | RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer)); |
2913 | 9 | writer.writeEndArray(); |
2914 | 9 | } else { |
2915 | | // Primitive value - can't remove anything from it |
2916 | 0 | writer.writeValue(root); |
2917 | 0 | } |
2918 | 24 | return Status::OK(); |
2919 | 24 | } |
2920 | | |
2921 | | Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path, |
2922 | 20 | size_t depth, JsonbWriter& writer) const { |
2923 | 20 | const auto* obj = obj_value->unpack<ObjectVal>(); |
2924 | | |
2925 | 40 | for (const auto& kv : *obj) { |
2926 | 40 | std::string key(kv.getKeyStr(), kv.klen()); |
2927 | | |
2928 | 40 | if (depth < path.get_leg_vector_size()) { |
2929 | 40 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2930 | 40 | if (leg->type == MEMBER_CODE) { |
2931 | 40 | std::string target_key(leg->leg_ptr, leg->leg_len); |
2932 | | |
2933 | 40 | if (key == target_key) { |
2934 | 20 | if (depth == path.get_leg_vector_size() - 1) { |
2935 | 12 | continue; |
2936 | 12 | } else { |
2937 | 8 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2938 | 8 | if (kv.value()->isObject()) { |
2939 | 3 | writer.writeStartObject(); |
2940 | 3 | RETURN_IF_ERROR(clone_object_without_path(kv.value(), path, |
2941 | 3 | depth + 1, writer)); |
2942 | 3 | writer.writeEndObject(); |
2943 | 5 | } else if (kv.value()->isArray()) { |
2944 | 5 | writer.writeStartArray(); |
2945 | 5 | RETURN_IF_ERROR(clone_array_without_path(kv.value(), path, |
2946 | 5 | depth + 1, writer)); |
2947 | 5 | writer.writeEndArray(); |
2948 | 5 | } else { |
2949 | 0 | writer.writeValue(kv.value()); |
2950 | 0 | } |
2951 | 8 | } |
2952 | 20 | } else { |
2953 | 20 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2954 | 20 | writer.writeValue(kv.value()); |
2955 | 20 | } |
2956 | 40 | } else { |
2957 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2958 | 0 | writer.writeValue(kv.value()); |
2959 | 0 | } |
2960 | 40 | } else { |
2961 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2962 | 0 | writer.writeValue(kv.value()); |
2963 | 0 | } |
2964 | 40 | } |
2965 | | |
2966 | 20 | return Status::OK(); |
2967 | 20 | } |
2968 | | |
2969 | | Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path, |
2970 | 17 | size_t depth, JsonbWriter& writer) const { |
2971 | 17 | const auto* arr = arr_value->unpack<ArrayVal>(); |
2972 | | |
2973 | 17 | int index = 0; |
2974 | 52 | for (const auto& element : *arr) { |
2975 | 52 | if (depth < path.get_leg_vector_size()) { |
2976 | 52 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2977 | 52 | if (leg->type == ARRAY_CODE) { |
2978 | 52 | int target_index = leg->array_index; |
2979 | | |
2980 | 52 | if (index == target_index) { |
2981 | 17 | if (depth == path.get_leg_vector_size() - 1) { |
2982 | | // This is the target element to remove - skip it |
2983 | 12 | } else { |
2984 | 5 | if (element.isObject()) { |
2985 | 2 | writer.writeStartObject(); |
2986 | 2 | RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1, |
2987 | 2 | writer)); |
2988 | 2 | writer.writeEndObject(); |
2989 | 3 | } else if (element.isArray()) { |
2990 | 3 | writer.writeStartArray(); |
2991 | 3 | RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1, |
2992 | 3 | writer)); |
2993 | 3 | writer.writeEndArray(); |
2994 | 3 | } else { |
2995 | 0 | writer.writeValue(&element); |
2996 | 0 | } |
2997 | 5 | } |
2998 | 35 | } else { |
2999 | 35 | writer.writeValue(&element); |
3000 | 35 | } |
3001 | 52 | } else { |
3002 | 0 | writer.writeValue(&element); |
3003 | 0 | } |
3004 | 52 | } else { |
3005 | 0 | writer.writeValue(&element); |
3006 | 0 | } |
3007 | 52 | index++; |
3008 | 52 | } |
3009 | | |
3010 | 17 | return Status::OK(); |
3011 | 17 | } |
3012 | | }; |
3013 | | |
3014 | | class FunctionStripNullValue : public IFunction { |
3015 | | public: |
3016 | | static constexpr auto name = "strip_null_value"; |
3017 | 24 | static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); } |
3018 | | |
3019 | 1 | String get_name() const override { return name; } |
3020 | 16 | bool is_variadic() const override { return false; } |
3021 | 15 | size_t get_number_of_arguments() const override { return 1; } |
3022 | | |
3023 | 30 | bool use_default_implementation_for_nulls() const override { return false; } |
3024 | | |
3025 | 15 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3026 | 15 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
3027 | 15 | } |
3028 | | |
3029 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3030 | 15 | uint32_t result, size_t input_rows_count) const override { |
3031 | 15 | const auto arg_column = |
3032 | 15 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
3033 | 15 | const ColumnString* json_column = nullptr; |
3034 | 15 | const NullMap* json_null_map = nullptr; |
3035 | 15 | if (const auto* nullable_col = check_and_get_column<ColumnNullable>(arg_column.get())) { |
3036 | 15 | json_column = assert_cast<const ColumnString*>(&nullable_col->get_nested_column()); |
3037 | 15 | json_null_map = &nullable_col->get_null_map_data(); |
3038 | 15 | } else { |
3039 | 0 | json_column = assert_cast<const ColumnString*>(arg_column.get()); |
3040 | 0 | } |
3041 | | |
3042 | 15 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
3043 | 15 | auto result_column = return_data_type->create_column(); |
3044 | | |
3045 | 15 | auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data(); |
3046 | 15 | auto& result_data_col = assert_cast<ColumnString&>( |
3047 | 15 | assert_cast<ColumnNullable&>(*result_column).get_nested_column()); |
3048 | | |
3049 | 15 | result_nullmap.resize_fill(input_rows_count, 0); |
3050 | 60 | for (size_t i = 0; i != input_rows_count; ++i) { |
3051 | 45 | if (json_null_map && (*json_null_map)[i]) { |
3052 | 13 | result_nullmap[i] = 1; |
3053 | 13 | result_data_col.insert_default(); |
3054 | 13 | continue; |
3055 | 13 | } |
3056 | 32 | const JsonbDocument* json_doc = nullptr; |
3057 | 32 | const auto& json_str = json_column->get_data_at(i); |
3058 | 32 | RETURN_IF_ERROR( |
3059 | 32 | JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc)); |
3060 | 32 | if (json_doc) [[likely]] { |
3061 | 32 | if (json_doc->getValue()->isNull()) { |
3062 | 9 | result_nullmap[i] = 1; |
3063 | 9 | result_data_col.insert_default(); |
3064 | 23 | } else { |
3065 | 23 | result_nullmap[i] = 0; |
3066 | 23 | result_data_col.insert_data(json_str.data, json_str.size); |
3067 | 23 | } |
3068 | 32 | } else { |
3069 | 0 | result_nullmap[i] = 1; |
3070 | 0 | result_data_col.insert_default(); |
3071 | 0 | } |
3072 | 32 | } |
3073 | | |
3074 | 15 | block.get_by_position(result).column = std::move(result_column); |
3075 | 15 | return Status::OK(); |
3076 | 15 | } |
3077 | | }; |
3078 | | |
3079 | 8 | void register_function_jsonb(SimpleFunctionFactory& factory) { |
3080 | 8 | factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name); |
3081 | 8 | factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias); |
3082 | 8 | factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null"); |
3083 | 8 | factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null"); |
3084 | 8 | factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value"); |
3085 | 8 | factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value"); |
3086 | | |
3087 | 8 | factory.register_function<FunctionJsonbExists>(); |
3088 | 8 | factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias); |
3089 | 8 | factory.register_function<FunctionJsonbType>(); |
3090 | 8 | factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias); |
3091 | | |
3092 | 8 | factory.register_function<FunctionJsonbKeys>(); |
3093 | 8 | factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias); |
3094 | | |
3095 | 8 | factory.register_function<FunctionJsonbExtractIsnull>(); |
3096 | 8 | factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias); |
3097 | | |
3098 | 8 | factory.register_function<FunctionJsonbExtractJsonb>(); |
3099 | 8 | factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias); |
3100 | 8 | factory.register_function<FunctionJsonbExtractJsonbNoQuotes>(); |
3101 | 8 | factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name, |
3102 | 8 | FunctionJsonbExtractJsonbNoQuotes::alias); |
3103 | | |
3104 | 8 | factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>(); |
3105 | 8 | factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>(); |
3106 | | |
3107 | 8 | factory.register_function<FunctionJsonSearch>(); |
3108 | | |
3109 | 8 | factory.register_function<FunctionJsonbArray<false>>(); |
3110 | 8 | factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias); |
3111 | | |
3112 | 8 | factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null"); |
3113 | 8 | factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null"); |
3114 | | |
3115 | 8 | factory.register_function<FunctionJsonbObject>(); |
3116 | 8 | factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias); |
3117 | | |
3118 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>(); |
3119 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name, |
3120 | 8 | FunctionJsonbModify<JsonbModifyType::Insert>::alias); |
3121 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>(); |
3122 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name, |
3123 | 8 | FunctionJsonbModify<JsonbModifyType::Set>::alias); |
3124 | 8 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>(); |
3125 | 8 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name, |
3126 | 8 | FunctionJsonbModify<JsonbModifyType::Replace>::alias); |
3127 | | |
3128 | 8 | factory.register_function<FunctionJsonbRemove>(); |
3129 | 8 | factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias); |
3130 | | |
3131 | 8 | factory.register_function<FunctionStripNullValue>(); |
3132 | 8 | } |
3133 | | |
3134 | | } // namespace doris |