be/src/exprs/function/function_jsonb.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstdlib> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <string_view> |
25 | | #include <tuple> |
26 | | #include <type_traits> |
27 | | #include <utility> |
28 | | #include <variant> |
29 | | |
30 | | #include "common/compiler_util.h" // IWYU pragma: keep |
31 | | #include "common/status.h" |
32 | | #include "core/assert_cast.h" |
33 | | #include "core/block/block.h" |
34 | | #include "core/block/column_numbers.h" |
35 | | #include "core/block/column_with_type_and_name.h" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_array.h" |
38 | | #include "core/column/column_const.h" |
39 | | #include "core/column/column_nullable.h" |
40 | | #include "core/column/column_string.h" |
41 | | #include "core/column/column_vector.h" |
42 | | #include "core/custom_allocator.h" |
43 | | #include "core/data_type/data_type.h" |
44 | | #include "core/data_type/data_type_array.h" |
45 | | #include "core/data_type/data_type_jsonb.h" |
46 | | #include "core/data_type/data_type_nullable.h" |
47 | | #include "core/data_type/data_type_string.h" |
48 | | #include "core/data_type/define_primitive_type.h" |
49 | | #include "core/data_type/primitive_type.h" |
50 | | #include "core/string_ref.h" |
51 | | #include "core/types.h" |
52 | | #include "core/value/jsonb_value.h" |
53 | | #include "exec/common/stringop_substring.h" |
54 | | #include "exec/common/template_helpers.hpp" |
55 | | #include "exec/common/util.hpp" |
56 | | #include "exprs/aggregate/aggregate_function.h" |
57 | | #include "exprs/function/function.h" |
58 | | #include "exprs/function/like.h" |
59 | | #include "exprs/function/simple_function_factory.h" |
60 | | #include "exprs/function_context.h" |
61 | | #include "util/jsonb_document.h" |
62 | | #include "util/jsonb_stream.h" |
63 | | #include "util/jsonb_utils.h" |
64 | | #include "util/jsonb_writer.h" |
65 | | #include "util/simd/bits.h" |
66 | | |
67 | | namespace doris { |
68 | | #include "common/compile_check_begin.h" |
69 | | |
70 | | enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT }; |
71 | | |
72 | | enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE }; |
73 | | |
74 | | // func(string,string) -> json |
75 | | template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode> |
76 | | class FunctionJsonbParseBase : public IFunction { |
77 | | private: |
78 | | struct FunctionJsonbParseState { |
79 | | StringRef default_value; |
80 | | JsonBinaryValue default_value_parser; |
81 | | bool has_const_default_value = false; |
82 | | bool default_is_null = false; |
83 | | }; |
84 | | |
85 | | public: |
86 | | static constexpr auto name = "json_parse"; |
87 | | static constexpr auto alias = "jsonb_parse"; |
88 | 35 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv Line | Count | Source | 88 | 15 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv Line | Count | Source | 88 | 9 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv Line | Count | Source | 88 | 11 | static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); } |
|
89 | | |
90 | 4 | String get_name() const override { |
91 | 4 | String error_mode; |
92 | 4 | switch (parse_error_handle_mode) { |
93 | 1 | case JsonbParseErrorMode::FAIL: |
94 | 1 | break; |
95 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
96 | 1 | error_mode = "_error_to_null"; |
97 | 1 | break; |
98 | 2 | case JsonbParseErrorMode::RETURN_VALUE: |
99 | 2 | error_mode = "_error_to_value"; |
100 | 2 | break; |
101 | 4 | } |
102 | | |
103 | 4 | return name + error_mode; |
104 | 4 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev Line | Count | Source | 90 | 1 | String get_name() const override { | 91 | 1 | String error_mode; | 92 | 1 | switch (parse_error_handle_mode) { | 93 | 1 | case JsonbParseErrorMode::FAIL: | 94 | 1 | break; | 95 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 96 | 0 | error_mode = "_error_to_null"; | 97 | 0 | break; | 98 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 99 | 0 | error_mode = "_error_to_value"; | 100 | 0 | break; | 101 | 1 | } | 102 | | | 103 | 1 | return name + error_mode; | 104 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev Line | Count | Source | 90 | 1 | String get_name() const override { | 91 | 1 | String error_mode; | 92 | 1 | switch (parse_error_handle_mode) { | 93 | 0 | case JsonbParseErrorMode::FAIL: | 94 | 0 | break; | 95 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 96 | 1 | error_mode = "_error_to_null"; | 97 | 1 | break; | 98 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 99 | 0 | error_mode = "_error_to_value"; | 100 | 0 | break; | 101 | 1 | } | 102 | | | 103 | 1 | return name + error_mode; | 104 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev Line | Count | Source | 90 | 2 | String get_name() const override { | 91 | 2 | String error_mode; | 92 | 2 | switch (parse_error_handle_mode) { | 93 | 0 | case JsonbParseErrorMode::FAIL: | 94 | 0 | break; | 95 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 96 | 0 | error_mode = "_error_to_null"; | 97 | 0 | break; | 98 | 2 | case JsonbParseErrorMode::RETURN_VALUE: | 99 | 2 | error_mode = "_error_to_value"; | 100 | 2 | break; | 101 | 2 | } | 102 | | | 103 | 2 | return name + error_mode; | 104 | 2 | } |
|
105 | | |
106 | 15 | bool is_variadic() const override { |
107 | 15 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; |
108 | 15 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv Line | Count | Source | 106 | 8 | bool is_variadic() const override { | 107 | 8 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 108 | 8 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv Line | Count | Source | 106 | 2 | bool is_variadic() const override { | 107 | 2 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 108 | 2 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv Line | Count | Source | 106 | 5 | bool is_variadic() const override { | 107 | 5 | return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE; | 108 | 5 | } |
|
109 | | |
110 | 9 | size_t get_number_of_arguments() const override { |
111 | 9 | switch (parse_error_handle_mode) { |
112 | 7 | case JsonbParseErrorMode::FAIL: |
113 | 7 | return 1; |
114 | 1 | case JsonbParseErrorMode::RETURN_NULL: |
115 | 1 | return 1; |
116 | 1 | case JsonbParseErrorMode::RETURN_VALUE: |
117 | 1 | return 0; |
118 | 9 | } |
119 | 9 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv Line | Count | Source | 110 | 7 | size_t get_number_of_arguments() const override { | 111 | 7 | switch (parse_error_handle_mode) { | 112 | 7 | case JsonbParseErrorMode::FAIL: | 113 | 7 | return 1; | 114 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 115 | 0 | return 1; | 116 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 117 | 0 | return 0; | 118 | 7 | } | 119 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv Line | Count | Source | 110 | 1 | size_t get_number_of_arguments() const override { | 111 | 1 | switch (parse_error_handle_mode) { | 112 | 0 | case JsonbParseErrorMode::FAIL: | 113 | 0 | return 1; | 114 | 1 | case JsonbParseErrorMode::RETURN_NULL: | 115 | 1 | return 1; | 116 | 0 | case JsonbParseErrorMode::RETURN_VALUE: | 117 | 0 | return 0; | 118 | 1 | } | 119 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv Line | Count | Source | 110 | 1 | size_t get_number_of_arguments() const override { | 111 | 1 | switch (parse_error_handle_mode) { | 112 | 0 | case JsonbParseErrorMode::FAIL: | 113 | 0 | return 1; | 114 | 0 | case JsonbParseErrorMode::RETURN_NULL: | 115 | 0 | return 1; | 116 | 1 | case JsonbParseErrorMode::RETURN_VALUE: | 117 | 1 | return 0; | 118 | 1 | } | 119 | 1 | } |
|
120 | | |
121 | 11 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
122 | 11 | bool is_nullable = false; |
123 | 11 | switch (nullable_mode) { |
124 | 1 | case NullalbeMode::NULLABLE: |
125 | 1 | is_nullable = true; |
126 | 1 | break; |
127 | 10 | case NullalbeMode::FOLLOW_INPUT: { |
128 | 14 | for (auto arg : arguments) { |
129 | 14 | is_nullable |= arg->is_nullable(); |
130 | 14 | } |
131 | 10 | break; |
132 | 0 | } |
133 | 11 | } |
134 | | |
135 | 11 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) |
136 | 11 | : std::make_shared<DataTypeJsonb>(); |
137 | 11 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 121 | 7 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 122 | 7 | bool is_nullable = false; | 123 | 7 | switch (nullable_mode) { | 124 | 0 | case NullalbeMode::NULLABLE: | 125 | 0 | is_nullable = true; | 126 | 0 | break; | 127 | 7 | case NullalbeMode::FOLLOW_INPUT: { | 128 | 7 | for (auto arg : arguments) { | 129 | 7 | is_nullable |= arg->is_nullable(); | 130 | 7 | } | 131 | 7 | break; | 132 | 0 | } | 133 | 7 | } | 134 | | | 135 | 7 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 136 | 7 | : std::make_shared<DataTypeJsonb>(); | 137 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 121 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 122 | 1 | bool is_nullable = false; | 123 | 1 | switch (nullable_mode) { | 124 | 1 | case NullalbeMode::NULLABLE: | 125 | 1 | is_nullable = true; | 126 | 1 | break; | 127 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 128 | 0 | for (auto arg : arguments) { | 129 | 0 | is_nullable |= arg->is_nullable(); | 130 | 0 | } | 131 | 0 | break; | 132 | 0 | } | 133 | 1 | } | 134 | | | 135 | 1 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 136 | 1 | : std::make_shared<DataTypeJsonb>(); | 137 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE Line | Count | Source | 121 | 3 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 122 | 3 | bool is_nullable = false; | 123 | 3 | switch (nullable_mode) { | 124 | 0 | case NullalbeMode::NULLABLE: | 125 | 0 | is_nullable = true; | 126 | 0 | break; | 127 | 3 | case NullalbeMode::FOLLOW_INPUT: { | 128 | 7 | for (auto arg : arguments) { | 129 | 7 | is_nullable |= arg->is_nullable(); | 130 | 7 | } | 131 | 3 | break; | 132 | 0 | } | 133 | 3 | } | 134 | | | 135 | 3 | return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>()) | 136 | 3 | : std::make_shared<DataTypeJsonb>(); | 137 | 3 | } |
|
138 | | |
139 | 21 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv Line | Count | Source | 139 | 14 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv Line | Count | Source | 139 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv Line | Count | Source | 139 | 5 | bool use_default_implementation_for_nulls() const override { return false; } |
|
140 | | |
141 | 20 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
142 | 20 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
143 | 10 | std::shared_ptr<FunctionJsonbParseState> state = |
144 | 10 | std::make_shared<FunctionJsonbParseState>(); |
145 | 10 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); |
146 | 10 | } |
147 | 20 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
148 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { |
149 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
150 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
151 | 2 | if (state) { |
152 | 2 | if (context->get_num_args() == 2) { |
153 | 1 | if (context->is_col_constant(1)) { |
154 | 0 | const auto default_value_col = context->get_constant_col(1)->column_ptr; |
155 | 0 | if (default_value_col->is_null_at(0)) { |
156 | 0 | state->default_is_null = true; |
157 | 0 | } else { |
158 | 0 | const auto& default_value = default_value_col->get_data_at(0); |
159 | |
|
160 | 0 | state->default_value = default_value; |
161 | 0 | state->has_const_default_value = true; |
162 | 0 | } |
163 | 0 | } |
164 | 1 | } else if (context->get_num_args() == 1) { |
165 | 0 | RETURN_IF_ERROR( |
166 | 0 | state->default_value_parser.from_json_string(std::string("{}"))); |
167 | 0 | state->default_value = StringRef(state->default_value_parser.value(), |
168 | 0 | state->default_value_parser.size()); |
169 | 0 | state->has_const_default_value = true; |
170 | 0 | } |
171 | 2 | } |
172 | 2 | } |
173 | | |
174 | 4 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { |
175 | 1 | return Status::InvalidArgument( |
176 | 1 | "{} function should have 1 or 2 arguments, " |
177 | 1 | "but got {}", |
178 | 1 | get_name(), context->get_num_args()); |
179 | 1 | } |
180 | 4 | } |
181 | 3 | return Status::OK(); |
182 | 20 | } _ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 141 | 14 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 142 | 14 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 143 | 7 | std::shared_ptr<FunctionJsonbParseState> state = | 144 | 7 | std::make_shared<FunctionJsonbParseState>(); | 145 | 7 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 146 | 7 | } | 147 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 148 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 149 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 150 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 151 | | if (state) { | 152 | | if (context->get_num_args() == 2) { | 153 | | if (context->is_col_constant(1)) { | 154 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 155 | | if (default_value_col->is_null_at(0)) { | 156 | | state->default_is_null = true; | 157 | | } else { | 158 | | const auto& default_value = default_value_col->get_data_at(0); | 159 | | | 160 | | state->default_value = default_value; | 161 | | state->has_const_default_value = true; | 162 | | } | 163 | | } | 164 | | } else if (context->get_num_args() == 1) { | 165 | | RETURN_IF_ERROR( | 166 | | state->default_value_parser.from_json_string(std::string("{}"))); | 167 | | state->default_value = StringRef(state->default_value_parser.value(), | 168 | | state->default_value_parser.size()); | 169 | | state->has_const_default_value = true; | 170 | | } | 171 | | } | 172 | | } | 173 | | | 174 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 175 | | return Status::InvalidArgument( | 176 | | "{} function should have 1 or 2 arguments, " | 177 | | "but got {}", | 178 | | get_name(), context->get_num_args()); | 179 | | } | 180 | | } | 181 | 14 | return Status::OK(); | 182 | 14 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 141 | 2 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 142 | 2 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 143 | 1 | std::shared_ptr<FunctionJsonbParseState> state = | 144 | 1 | std::make_shared<FunctionJsonbParseState>(); | 145 | 1 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 146 | 1 | } | 147 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 148 | | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 149 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 150 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 151 | | if (state) { | 152 | | if (context->get_num_args() == 2) { | 153 | | if (context->is_col_constant(1)) { | 154 | | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 155 | | if (default_value_col->is_null_at(0)) { | 156 | | state->default_is_null = true; | 157 | | } else { | 158 | | const auto& default_value = default_value_col->get_data_at(0); | 159 | | | 160 | | state->default_value = default_value; | 161 | | state->has_const_default_value = true; | 162 | | } | 163 | | } | 164 | | } else if (context->get_num_args() == 1) { | 165 | | RETURN_IF_ERROR( | 166 | | state->default_value_parser.from_json_string(std::string("{}"))); | 167 | | state->default_value = StringRef(state->default_value_parser.value(), | 168 | | state->default_value_parser.size()); | 169 | | state->has_const_default_value = true; | 170 | | } | 171 | | } | 172 | | } | 173 | | | 174 | | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 175 | | return Status::InvalidArgument( | 176 | | "{} function should have 1 or 2 arguments, " | 177 | | "but got {}", | 178 | | get_name(), context->get_num_args()); | 179 | | } | 180 | | } | 181 | 2 | return Status::OK(); | 182 | 2 | } |
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE Line | Count | Source | 141 | 4 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 142 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 143 | 2 | std::shared_ptr<FunctionJsonbParseState> state = | 144 | 2 | std::make_shared<FunctionJsonbParseState>(); | 145 | 2 | context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state); | 146 | 2 | } | 147 | 4 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 148 | 4 | if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) { | 149 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 150 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 151 | 2 | if (state) { | 152 | 2 | if (context->get_num_args() == 2) { | 153 | 1 | if (context->is_col_constant(1)) { | 154 | 0 | const auto default_value_col = context->get_constant_col(1)->column_ptr; | 155 | 0 | if (default_value_col->is_null_at(0)) { | 156 | 0 | state->default_is_null = true; | 157 | 0 | } else { | 158 | 0 | const auto& default_value = default_value_col->get_data_at(0); | 159 | |
| 160 | 0 | state->default_value = default_value; | 161 | 0 | state->has_const_default_value = true; | 162 | 0 | } | 163 | 0 | } | 164 | 1 | } else if (context->get_num_args() == 1) { | 165 | 0 | RETURN_IF_ERROR( | 166 | 0 | state->default_value_parser.from_json_string(std::string("{}"))); | 167 | 0 | state->default_value = StringRef(state->default_value_parser.value(), | 168 | 0 | state->default_value_parser.size()); | 169 | 0 | state->has_const_default_value = true; | 170 | 0 | } | 171 | 2 | } | 172 | 2 | } | 173 | | | 174 | 4 | if (context->get_num_args() != 1 && context->get_num_args() != 2) { | 175 | 1 | return Status::InvalidArgument( | 176 | 1 | "{} function should have 1 or 2 arguments, " | 177 | 1 | "but got {}", | 178 | 1 | get_name(), context->get_num_args()); | 179 | 1 | } | 180 | 4 | } | 181 | 3 | return Status::OK(); | 182 | 4 | } |
|
183 | | |
184 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
185 | 10 | uint32_t result, size_t input_rows_count) const override { |
186 | 10 | auto&& [col_from, col_from_is_const] = |
187 | 10 | unpack_if_const(block.get_by_position(arguments[0]).column); |
188 | | |
189 | 10 | if (col_from_is_const && col_from->is_null_at(0)) { |
190 | 0 | auto col_str = ColumnString::create(); |
191 | 0 | col_str->insert_default(); |
192 | 0 | auto null_map = ColumnUInt8::create(1, 1); |
193 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); |
194 | 0 | block.get_by_position(result).column = |
195 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); |
196 | 0 | return Status::OK(); |
197 | 0 | } |
198 | | |
199 | 10 | auto null_map = ColumnUInt8::create(0, 0); |
200 | 10 | bool is_nullable = false; |
201 | | |
202 | 10 | switch (nullable_mode) { |
203 | 1 | case NullalbeMode::NULLABLE: { |
204 | 1 | is_nullable = true; |
205 | 1 | break; |
206 | 0 | } |
207 | 9 | case NullalbeMode::FOLLOW_INPUT: { |
208 | 11 | for (auto arg : arguments) { |
209 | 11 | is_nullable |= block.get_by_position(arg).type->is_nullable(); |
210 | 11 | } |
211 | 9 | break; |
212 | 0 | } |
213 | 10 | } |
214 | | |
215 | 10 | if (is_nullable) { |
216 | 9 | null_map = ColumnUInt8::create(input_rows_count, 0); |
217 | 9 | } |
218 | | |
219 | 8 | const ColumnString* col_from_string = nullptr; |
220 | 10 | if (col_from->is_nullable()) { |
221 | 9 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); |
222 | | |
223 | 9 | VectorizedUtils::update_null_map(null_map->get_data(), |
224 | 9 | nullable_col.get_null_map_data()); |
225 | 9 | col_from_string = |
226 | 9 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); |
227 | 9 | } else { |
228 | 1 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); |
229 | 1 | } |
230 | | |
231 | 8 | StringRef constant_default_value; |
232 | 8 | bool default_value_const = false; |
233 | 8 | bool default_value_null_const = false; |
234 | 8 | ColumnPtr default_value_col; |
235 | 8 | JsonBinaryValue default_jsonb_value_parser; |
236 | 8 | const ColumnString* default_value_str_col = nullptr; |
237 | 8 | const NullMap* default_value_nullmap = nullptr; |
238 | 8 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { |
239 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( |
240 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
241 | 2 | if (state && state->has_const_default_value) { |
242 | 0 | constant_default_value = state->default_value; |
243 | 0 | default_value_null_const = state->default_is_null; |
244 | 0 | default_value_const = true; |
245 | 2 | } else if (arguments.size() > 1) { |
246 | 2 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != |
247 | 2 | PrimitiveType::TYPE_JSONB) { |
248 | 1 | return Status::InvalidArgument( |
249 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), |
250 | 1 | block.get_by_position(arguments[1]).type->get_name()); |
251 | 1 | } |
252 | 1 | std::tie(default_value_col, default_value_const) = |
253 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); |
254 | 1 | if (default_value_const) { |
255 | 0 | const JsonbDocument* default_value_doc = nullptr; |
256 | 0 | if (default_value_col->is_null_at(0)) { |
257 | 0 | default_value_null_const = true; |
258 | 0 | } else { |
259 | 0 | auto data = default_value_col->get_data_at(0); |
260 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, |
261 | 0 | &default_value_doc)); |
262 | 0 | constant_default_value = data; |
263 | 0 | } |
264 | 1 | } else { |
265 | 1 | if (default_value_col->is_nullable()) { |
266 | 1 | const auto& nullable_col = |
267 | 1 | assert_cast<const ColumnNullable&>(*default_value_col); |
268 | 1 | default_value_str_col = assert_cast<const ColumnString*>( |
269 | 1 | nullable_col.get_nested_column_ptr().get()); |
270 | 1 | default_value_nullmap = &(nullable_col.get_null_map_data()); |
271 | 1 | } else { |
272 | 0 | default_value_str_col = |
273 | 0 | assert_cast<const ColumnString*>(default_value_col.get()); |
274 | 0 | } |
275 | 1 | } |
276 | 1 | } else if (arguments.size() == 1) { |
277 | | // parse default value '{}' should always success. |
278 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); |
279 | 0 | default_value_const = true; |
280 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); |
281 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); |
282 | 0 | } |
283 | 2 | } |
284 | | |
285 | 1 | auto col_to = ColumnString::create(); |
286 | | |
287 | 8 | col_to->reserve(input_rows_count); |
288 | | |
289 | 8 | auto& null_map_data = null_map->get_data(); |
290 | | |
291 | | // parser can be reused for performance |
292 | 8 | JsonBinaryValue jsonb_value; |
293 | | |
294 | 79 | for (size_t i = 0; i < input_rows_count; ++i) { |
295 | 69 | if (is_nullable && null_map_data[i]) { |
296 | 3 | col_to->insert_default(); |
297 | 3 | continue; |
298 | 3 | } |
299 | | |
300 | 66 | auto index = index_check_const(i, col_from_is_const); |
301 | 66 | const auto& val = col_from_string->get_data_at(index); |
302 | 66 | auto st = jsonb_value.from_json_string(val.data, val.size); |
303 | 66 | if (st.ok()) { |
304 | | // insert jsonb format data |
305 | 48 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); |
306 | 48 | } else { |
307 | 18 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { |
308 | 6 | return Status::InvalidArgument( |
309 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); |
310 | 6 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { |
311 | 6 | null_map_data[i] = 1; |
312 | 6 | col_to->insert_default(); |
313 | 6 | } else { |
314 | 6 | if (default_value_const) { |
315 | 0 | if (default_value_null_const) { |
316 | 0 | null_map_data[i] = 1; |
317 | 0 | col_to->insert_default(); |
318 | 0 | } else { |
319 | 0 | col_to->insert_data(constant_default_value.data, |
320 | 0 | constant_default_value.size); |
321 | 0 | } |
322 | 6 | } else { |
323 | 6 | if (default_value_nullmap && (*default_value_nullmap)[i]) { |
324 | 0 | null_map_data[i] = 1; |
325 | 0 | col_to->insert_default(); |
326 | 0 | continue; |
327 | 0 | } |
328 | 6 | auto value = default_value_str_col->get_data_at(i); |
329 | 6 | col_to->insert_data(value.data, value.size); |
330 | 6 | } |
331 | 6 | } |
332 | 18 | } |
333 | 66 | } |
334 | | |
335 | 10 | if (is_nullable) { |
336 | 3 | block.replace_by_position( |
337 | 3 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); |
338 | 7 | } else { |
339 | 7 | block.replace_by_position(result, std::move(col_to)); |
340 | 7 | } |
341 | | |
342 | 2 | return Status::OK(); |
343 | 2 | } _ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 185 | 7 | uint32_t result, size_t input_rows_count) const override { | 186 | 7 | auto&& [col_from, col_from_is_const] = | 187 | 7 | unpack_if_const(block.get_by_position(arguments[0]).column); | 188 | | | 189 | 7 | if (col_from_is_const && col_from->is_null_at(0)) { | 190 | 0 | auto col_str = ColumnString::create(); | 191 | 0 | col_str->insert_default(); | 192 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 193 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 194 | 0 | block.get_by_position(result).column = | 195 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 196 | 0 | return Status::OK(); | 197 | 0 | } | 198 | | | 199 | 7 | auto null_map = ColumnUInt8::create(0, 0); | 200 | 7 | bool is_nullable = false; | 201 | | | 202 | 7 | switch (nullable_mode) { | 203 | 0 | case NullalbeMode::NULLABLE: { | 204 | 0 | is_nullable = true; | 205 | 0 | break; | 206 | 0 | } | 207 | 7 | case NullalbeMode::FOLLOW_INPUT: { | 208 | 7 | for (auto arg : arguments) { | 209 | 7 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 210 | 7 | } | 211 | 7 | break; | 212 | 0 | } | 213 | 7 | } | 214 | | | 215 | 7 | if (is_nullable) { | 216 | 7 | null_map = ColumnUInt8::create(input_rows_count, 0); | 217 | 7 | } | 218 | | | 219 | 7 | const ColumnString* col_from_string = nullptr; | 220 | 7 | if (col_from->is_nullable()) { | 221 | 7 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 222 | | | 223 | 7 | VectorizedUtils::update_null_map(null_map->get_data(), | 224 | 7 | nullable_col.get_null_map_data()); | 225 | 7 | col_from_string = | 226 | 7 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 227 | 7 | } else { | 228 | 0 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 229 | 0 | } | 230 | | | 231 | 7 | StringRef constant_default_value; | 232 | 7 | bool default_value_const = false; | 233 | 7 | bool default_value_null_const = false; | 234 | 7 | ColumnPtr default_value_col; | 235 | 7 | JsonBinaryValue default_jsonb_value_parser; | 236 | 7 | const ColumnString* default_value_str_col = nullptr; | 237 | 7 | const NullMap* default_value_nullmap = nullptr; | 238 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 239 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 240 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 241 | | if (state && state->has_const_default_value) { | 242 | | constant_default_value = state->default_value; | 243 | | default_value_null_const = state->default_is_null; | 244 | | default_value_const = true; | 245 | | } else if (arguments.size() > 1) { | 246 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 247 | | PrimitiveType::TYPE_JSONB) { | 248 | | return Status::InvalidArgument( | 249 | | "{} second argument should be jsonb type, but got {}", get_name(), | 250 | | block.get_by_position(arguments[1]).type->get_name()); | 251 | | } | 252 | | std::tie(default_value_col, default_value_const) = | 253 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 254 | | if (default_value_const) { | 255 | | const JsonbDocument* default_value_doc = nullptr; | 256 | | if (default_value_col->is_null_at(0)) { | 257 | | default_value_null_const = true; | 258 | | } else { | 259 | | auto data = default_value_col->get_data_at(0); | 260 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 261 | | &default_value_doc)); | 262 | | constant_default_value = data; | 263 | | } | 264 | | } else { | 265 | | if (default_value_col->is_nullable()) { | 266 | | const auto& nullable_col = | 267 | | assert_cast<const ColumnNullable&>(*default_value_col); | 268 | | default_value_str_col = assert_cast<const ColumnString*>( | 269 | | nullable_col.get_nested_column_ptr().get()); | 270 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 271 | | } else { | 272 | | default_value_str_col = | 273 | | assert_cast<const ColumnString*>(default_value_col.get()); | 274 | | } | 275 | | } | 276 | | } else if (arguments.size() == 1) { | 277 | | // parse default value '{}' should always success. | 278 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 279 | | default_value_const = true; | 280 | | constant_default_value.data = default_jsonb_value_parser.value(); | 281 | | constant_default_value.size = default_jsonb_value_parser.size(); | 282 | | } | 283 | | } | 284 | | | 285 | 7 | auto col_to = ColumnString::create(); | 286 | | | 287 | 7 | col_to->reserve(input_rows_count); | 288 | | | 289 | 7 | auto& null_map_data = null_map->get_data(); | 290 | | | 291 | | // parser can be reused for performance | 292 | 7 | JsonBinaryValue jsonb_value; | 293 | | | 294 | 30 | for (size_t i = 0; i < input_rows_count; ++i) { | 295 | 23 | if (is_nullable && null_map_data[i]) { | 296 | 1 | col_to->insert_default(); | 297 | 1 | continue; | 298 | 1 | } | 299 | | | 300 | 22 | auto index = index_check_const(i, col_from_is_const); | 301 | 22 | const auto& val = col_from_string->get_data_at(index); | 302 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 303 | 22 | if (st.ok()) { | 304 | | // insert jsonb format data | 305 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 306 | 16 | } else { | 307 | 6 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 308 | 6 | return Status::InvalidArgument( | 309 | 6 | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 310 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 311 | | null_map_data[i] = 1; | 312 | | col_to->insert_default(); | 313 | | } else { | 314 | | if (default_value_const) { | 315 | | if (default_value_null_const) { | 316 | | null_map_data[i] = 1; | 317 | | col_to->insert_default(); | 318 | | } else { | 319 | | col_to->insert_data(constant_default_value.data, | 320 | | constant_default_value.size); | 321 | | } | 322 | | } else { | 323 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 324 | | null_map_data[i] = 1; | 325 | | col_to->insert_default(); | 326 | | continue; | 327 | | } | 328 | | auto value = default_value_str_col->get_data_at(i); | 329 | | col_to->insert_data(value.data, value.size); | 330 | | } | 331 | | } | 332 | 6 | } | 333 | 22 | } | 334 | | | 335 | 7 | if (is_nullable) { | 336 | 1 | block.replace_by_position( | 337 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 338 | 6 | } else { | 339 | 6 | block.replace_by_position(result, std::move(col_to)); | 340 | 6 | } | 341 | | | 342 | 7 | return Status::OK(); | 343 | 7 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 185 | 1 | uint32_t result, size_t input_rows_count) const override { | 186 | 1 | auto&& [col_from, col_from_is_const] = | 187 | 1 | unpack_if_const(block.get_by_position(arguments[0]).column); | 188 | | | 189 | 1 | if (col_from_is_const && col_from->is_null_at(0)) { | 190 | 0 | auto col_str = ColumnString::create(); | 191 | 0 | col_str->insert_default(); | 192 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 193 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 194 | 0 | block.get_by_position(result).column = | 195 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 196 | 0 | return Status::OK(); | 197 | 0 | } | 198 | | | 199 | 1 | auto null_map = ColumnUInt8::create(0, 0); | 200 | 1 | bool is_nullable = false; | 201 | | | 202 | 1 | switch (nullable_mode) { | 203 | 1 | case NullalbeMode::NULLABLE: { | 204 | 1 | is_nullable = true; | 205 | 1 | break; | 206 | 0 | } | 207 | 0 | case NullalbeMode::FOLLOW_INPUT: { | 208 | 0 | for (auto arg : arguments) { | 209 | 0 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 210 | 0 | } | 211 | 0 | break; | 212 | 0 | } | 213 | 1 | } | 214 | | | 215 | 1 | if (is_nullable) { | 216 | 1 | null_map = ColumnUInt8::create(input_rows_count, 0); | 217 | 1 | } | 218 | | | 219 | 1 | const ColumnString* col_from_string = nullptr; | 220 | 1 | if (col_from->is_nullable()) { | 221 | 1 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 222 | | | 223 | 1 | VectorizedUtils::update_null_map(null_map->get_data(), | 224 | 1 | nullable_col.get_null_map_data()); | 225 | 1 | col_from_string = | 226 | 1 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 227 | 1 | } else { | 228 | 0 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 229 | 0 | } | 230 | | | 231 | 1 | StringRef constant_default_value; | 232 | 1 | bool default_value_const = false; | 233 | 1 | bool default_value_null_const = false; | 234 | 1 | ColumnPtr default_value_col; | 235 | 1 | JsonBinaryValue default_jsonb_value_parser; | 236 | 1 | const ColumnString* default_value_str_col = nullptr; | 237 | 1 | const NullMap* default_value_nullmap = nullptr; | 238 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 239 | | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 240 | | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 241 | | if (state && state->has_const_default_value) { | 242 | | constant_default_value = state->default_value; | 243 | | default_value_null_const = state->default_is_null; | 244 | | default_value_const = true; | 245 | | } else if (arguments.size() > 1) { | 246 | | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 247 | | PrimitiveType::TYPE_JSONB) { | 248 | | return Status::InvalidArgument( | 249 | | "{} second argument should be jsonb type, but got {}", get_name(), | 250 | | block.get_by_position(arguments[1]).type->get_name()); | 251 | | } | 252 | | std::tie(default_value_col, default_value_const) = | 253 | | unpack_if_const(block.get_by_position(arguments[1]).column); | 254 | | if (default_value_const) { | 255 | | const JsonbDocument* default_value_doc = nullptr; | 256 | | if (default_value_col->is_null_at(0)) { | 257 | | default_value_null_const = true; | 258 | | } else { | 259 | | auto data = default_value_col->get_data_at(0); | 260 | | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 261 | | &default_value_doc)); | 262 | | constant_default_value = data; | 263 | | } | 264 | | } else { | 265 | | if (default_value_col->is_nullable()) { | 266 | | const auto& nullable_col = | 267 | | assert_cast<const ColumnNullable&>(*default_value_col); | 268 | | default_value_str_col = assert_cast<const ColumnString*>( | 269 | | nullable_col.get_nested_column_ptr().get()); | 270 | | default_value_nullmap = &(nullable_col.get_null_map_data()); | 271 | | } else { | 272 | | default_value_str_col = | 273 | | assert_cast<const ColumnString*>(default_value_col.get()); | 274 | | } | 275 | | } | 276 | | } else if (arguments.size() == 1) { | 277 | | // parse default value '{}' should always success. | 278 | | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 279 | | default_value_const = true; | 280 | | constant_default_value.data = default_jsonb_value_parser.value(); | 281 | | constant_default_value.size = default_jsonb_value_parser.size(); | 282 | | } | 283 | | } | 284 | | | 285 | 1 | auto col_to = ColumnString::create(); | 286 | | | 287 | 1 | col_to->reserve(input_rows_count); | 288 | | | 289 | 1 | auto& null_map_data = null_map->get_data(); | 290 | | | 291 | | // parser can be reused for performance | 292 | 1 | JsonBinaryValue jsonb_value; | 293 | | | 294 | 24 | for (size_t i = 0; i < input_rows_count; ++i) { | 295 | 23 | if (is_nullable && null_map_data[i]) { | 296 | 1 | col_to->insert_default(); | 297 | 1 | continue; | 298 | 1 | } | 299 | | | 300 | 22 | auto index = index_check_const(i, col_from_is_const); | 301 | 22 | const auto& val = col_from_string->get_data_at(index); | 302 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 303 | 22 | if (st.ok()) { | 304 | | // insert jsonb format data | 305 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 306 | 16 | } else { | 307 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 308 | | return Status::InvalidArgument( | 309 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 310 | 6 | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 311 | 6 | null_map_data[i] = 1; | 312 | 6 | col_to->insert_default(); | 313 | | } else { | 314 | | if (default_value_const) { | 315 | | if (default_value_null_const) { | 316 | | null_map_data[i] = 1; | 317 | | col_to->insert_default(); | 318 | | } else { | 319 | | col_to->insert_data(constant_default_value.data, | 320 | | constant_default_value.size); | 321 | | } | 322 | | } else { | 323 | | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 324 | | null_map_data[i] = 1; | 325 | | col_to->insert_default(); | 326 | | continue; | 327 | | } | 328 | | auto value = default_value_str_col->get_data_at(i); | 329 | | col_to->insert_data(value.data, value.size); | 330 | | } | 331 | | } | 332 | 6 | } | 333 | 22 | } | 334 | | | 335 | 1 | if (is_nullable) { | 336 | 1 | block.replace_by_position( | 337 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 338 | 1 | } else { | 339 | 0 | block.replace_by_position(result, std::move(col_to)); | 340 | 0 | } | 341 | | | 342 | 1 | return Status::OK(); | 343 | 1 | } |
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 185 | 2 | uint32_t result, size_t input_rows_count) const override { | 186 | 2 | auto&& [col_from, col_from_is_const] = | 187 | 2 | unpack_if_const(block.get_by_position(arguments[0]).column); | 188 | | | 189 | 2 | if (col_from_is_const && col_from->is_null_at(0)) { | 190 | 0 | auto col_str = ColumnString::create(); | 191 | 0 | col_str->insert_default(); | 192 | 0 | auto null_map = ColumnUInt8::create(1, 1); | 193 | 0 | auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map)); | 194 | 0 | block.get_by_position(result).column = | 195 | 0 | ColumnConst::create(std::move(nullable_col), input_rows_count); | 196 | 0 | return Status::OK(); | 197 | 0 | } | 198 | | | 199 | 2 | auto null_map = ColumnUInt8::create(0, 0); | 200 | 2 | bool is_nullable = false; | 201 | | | 202 | 2 | switch (nullable_mode) { | 203 | 0 | case NullalbeMode::NULLABLE: { | 204 | 0 | is_nullable = true; | 205 | 0 | break; | 206 | 0 | } | 207 | 2 | case NullalbeMode::FOLLOW_INPUT: { | 208 | 4 | for (auto arg : arguments) { | 209 | 4 | is_nullable |= block.get_by_position(arg).type->is_nullable(); | 210 | 4 | } | 211 | 2 | break; | 212 | 0 | } | 213 | 2 | } | 214 | | | 215 | 2 | if (is_nullable) { | 216 | 1 | null_map = ColumnUInt8::create(input_rows_count, 0); | 217 | 1 | } | 218 | | | 219 | 2 | const ColumnString* col_from_string = nullptr; | 220 | 2 | if (col_from->is_nullable()) { | 221 | 1 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from); | 222 | | | 223 | 1 | VectorizedUtils::update_null_map(null_map->get_data(), | 224 | 1 | nullable_col.get_null_map_data()); | 225 | 1 | col_from_string = | 226 | 1 | assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get()); | 227 | 1 | } else { | 228 | 1 | col_from_string = assert_cast<const ColumnString*>(col_from.get()); | 229 | 1 | } | 230 | | | 231 | 2 | StringRef constant_default_value; | 232 | 2 | bool default_value_const = false; | 233 | 2 | bool default_value_null_const = false; | 234 | 2 | ColumnPtr default_value_col; | 235 | 2 | JsonBinaryValue default_jsonb_value_parser; | 236 | 2 | const ColumnString* default_value_str_col = nullptr; | 237 | 2 | const NullMap* default_value_nullmap = nullptr; | 238 | 2 | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { | 239 | 2 | auto* state = reinterpret_cast<FunctionJsonbParseState*>( | 240 | 2 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 241 | 2 | if (state && state->has_const_default_value) { | 242 | 0 | constant_default_value = state->default_value; | 243 | 0 | default_value_null_const = state->default_is_null; | 244 | 0 | default_value_const = true; | 245 | 2 | } else if (arguments.size() > 1) { | 246 | 2 | if (block.get_by_position(arguments[1]).type->get_primitive_type() != | 247 | 2 | PrimitiveType::TYPE_JSONB) { | 248 | 1 | return Status::InvalidArgument( | 249 | 1 | "{} second argument should be jsonb type, but got {}", get_name(), | 250 | 1 | block.get_by_position(arguments[1]).type->get_name()); | 251 | 1 | } | 252 | 1 | std::tie(default_value_col, default_value_const) = | 253 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 254 | 1 | if (default_value_const) { | 255 | 0 | const JsonbDocument* default_value_doc = nullptr; | 256 | 0 | if (default_value_col->is_null_at(0)) { | 257 | 0 | default_value_null_const = true; | 258 | 0 | } else { | 259 | 0 | auto data = default_value_col->get_data_at(0); | 260 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size, | 261 | 0 | &default_value_doc)); | 262 | 0 | constant_default_value = data; | 263 | 0 | } | 264 | 1 | } else { | 265 | 1 | if (default_value_col->is_nullable()) { | 266 | 1 | const auto& nullable_col = | 267 | 1 | assert_cast<const ColumnNullable&>(*default_value_col); | 268 | 1 | default_value_str_col = assert_cast<const ColumnString*>( | 269 | 1 | nullable_col.get_nested_column_ptr().get()); | 270 | 1 | default_value_nullmap = &(nullable_col.get_null_map_data()); | 271 | 1 | } else { | 272 | 0 | default_value_str_col = | 273 | 0 | assert_cast<const ColumnString*>(default_value_col.get()); | 274 | 0 | } | 275 | 1 | } | 276 | 1 | } else if (arguments.size() == 1) { | 277 | | // parse default value '{}' should always success. | 278 | 0 | RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}"))); | 279 | 0 | default_value_const = true; | 280 | 0 | constant_default_value.data = default_jsonb_value_parser.value(); | 281 | 0 | constant_default_value.size = default_jsonb_value_parser.size(); | 282 | 0 | } | 283 | 2 | } | 284 | | | 285 | 1 | auto col_to = ColumnString::create(); | 286 | | | 287 | 2 | col_to->reserve(input_rows_count); | 288 | | | 289 | 2 | auto& null_map_data = null_map->get_data(); | 290 | | | 291 | | // parser can be reused for performance | 292 | 2 | JsonBinaryValue jsonb_value; | 293 | | | 294 | 25 | for (size_t i = 0; i < input_rows_count; ++i) { | 295 | 23 | if (is_nullable && null_map_data[i]) { | 296 | 1 | col_to->insert_default(); | 297 | 1 | continue; | 298 | 1 | } | 299 | | | 300 | 22 | auto index = index_check_const(i, col_from_is_const); | 301 | 22 | const auto& val = col_from_string->get_data_at(index); | 302 | 22 | auto st = jsonb_value.from_json_string(val.data, val.size); | 303 | 22 | if (st.ok()) { | 304 | | // insert jsonb format data | 305 | 16 | col_to->insert_data(jsonb_value.value(), jsonb_value.size()); | 306 | 16 | } else { | 307 | | if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) { | 308 | | return Status::InvalidArgument( | 309 | | "Parse json document failed at row {}, error: {}", i, st.to_string()); | 310 | | } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) { | 311 | | null_map_data[i] = 1; | 312 | | col_to->insert_default(); | 313 | 6 | } else { | 314 | 6 | if (default_value_const) { | 315 | 0 | if (default_value_null_const) { | 316 | 0 | null_map_data[i] = 1; | 317 | 0 | col_to->insert_default(); | 318 | 0 | } else { | 319 | 0 | col_to->insert_data(constant_default_value.data, | 320 | 0 | constant_default_value.size); | 321 | 0 | } | 322 | 6 | } else { | 323 | 6 | if (default_value_nullmap && (*default_value_nullmap)[i]) { | 324 | 0 | null_map_data[i] = 1; | 325 | 0 | col_to->insert_default(); | 326 | 0 | continue; | 327 | 0 | } | 328 | 6 | auto value = default_value_str_col->get_data_at(i); | 329 | 6 | col_to->insert_data(value.data, value.size); | 330 | 6 | } | 331 | 6 | } | 332 | 6 | } | 333 | 22 | } | 334 | | | 335 | 2 | if (is_nullable) { | 336 | 1 | block.replace_by_position( | 337 | 1 | result, ColumnNullable::create(std::move(col_to), std::move(null_map))); | 338 | 1 | } else { | 339 | 1 | block.replace_by_position(result, std::move(col_to)); | 340 | 1 | } | 341 | | | 342 | 2 | return Status::OK(); | 343 | 2 | } |
|
344 | | }; |
345 | | |
346 | | // jsonb_parse return type nullable as input |
347 | | using FunctionJsonbParse = |
348 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>; |
349 | | using FunctionJsonbParseErrorNull = |
350 | | FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>; |
351 | | using FunctionJsonbParseErrorValue = |
352 | | FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>; |
353 | | |
354 | | // func(jsonb, [varchar, varchar, ...]) -> nullable(type) |
355 | | template <typename Impl> |
356 | | class FunctionJsonbExtract : public IFunction { |
357 | | public: |
358 | | static constexpr auto name = Impl::name; |
359 | | static constexpr auto alias = Impl::alias; |
360 | 135 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv Line | Count | Source | 360 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv Line | Count | Source | 360 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv Line | Count | Source | 360 | 111 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv Line | Count | Source | 360 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); } |
|
361 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev |
362 | 108 | bool is_variadic() const override { return true; }_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv Line | Count | Source | 362 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv Line | Count | Source | 362 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv Line | Count | Source | 362 | 105 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv Line | Count | Source | 362 | 1 | bool is_variadic() const override { return true; } |
|
363 | 1 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv Line | Count | Source | 363 | 1 | size_t get_number_of_arguments() const override { return 0; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv |
364 | 210 | bool use_default_implementation_for_nulls() const override { return false; }Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv Line | Count | Source | 364 | 210 | bool use_default_implementation_for_nulls() const override { return false; } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv |
365 | 103 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
366 | 103 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); |
367 | 103 | } Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 365 | 103 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 366 | 103 | return make_nullable(std::make_shared<typename Impl::ReturnType>()); | 367 | 103 | } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
368 | 28 | DataTypes get_variadic_argument_types_impl() const override { |
369 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { |
370 | | return Impl::get_variadic_argument_types_impl(); |
371 | 28 | } else { |
372 | 28 | return {}; |
373 | 28 | } |
374 | 28 | } _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 368 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 369 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 370 | | return Impl::get_variadic_argument_types_impl(); | 371 | 7 | } else { | 372 | 7 | return {}; | 373 | 7 | } | 374 | 7 | } |
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv Line | Count | Source | 368 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 369 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 370 | | return Impl::get_variadic_argument_types_impl(); | 371 | 7 | } else { | 372 | 7 | return {}; | 373 | 7 | } | 374 | 7 | } |
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv Line | Count | Source | 368 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 369 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 370 | | return Impl::get_variadic_argument_types_impl(); | 371 | 7 | } else { | 372 | 7 | return {}; | 373 | 7 | } | 374 | 7 | } |
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv Line | Count | Source | 368 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 369 | | if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) { | 370 | | return Impl::get_variadic_argument_types_impl(); | 371 | 7 | } else { | 372 | 7 | return {}; | 373 | 7 | } | 374 | 7 | } |
|
375 | | |
376 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
377 | 107 | uint32_t result, size_t input_rows_count) const override { |
378 | 107 | DCHECK_GE(arguments.size(), 2); |
379 | | |
380 | 107 | ColumnPtr jsonb_data_column; |
381 | 107 | bool jsonb_data_const = false; |
382 | 107 | const NullMap* data_null_map = nullptr; |
383 | | |
384 | 107 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != |
385 | 107 | PrimitiveType::TYPE_JSONB) { |
386 | 1 | return Status::InvalidArgument( |
387 | 1 | "jsonb_extract first argument should be json type, but got {}", |
388 | 1 | block.get_by_position(arguments[0]).type->get_name()); |
389 | 1 | } |
390 | | |
391 | | // prepare jsonb data column |
392 | 106 | std::tie(jsonb_data_column, jsonb_data_const) = |
393 | 106 | unpack_if_const(block.get_by_position(arguments[0]).column); |
394 | 106 | if (jsonb_data_column->is_nullable()) { |
395 | 106 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); |
396 | 106 | jsonb_data_column = nullable_column.get_nested_column_ptr(); |
397 | 106 | data_null_map = &nullable_column.get_null_map_data(); |
398 | 106 | } |
399 | 106 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); |
400 | 106 | const auto& loffsets = |
401 | 106 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); |
402 | | |
403 | | // prepare parse path column prepare |
404 | 106 | std::vector<const ColumnString*> jsonb_path_columns; |
405 | 106 | std::vector<bool> path_const(arguments.size() - 1); |
406 | 106 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); |
407 | 212 | for (int i = 0; i < arguments.size() - 1; ++i) { |
408 | 106 | ColumnPtr path_column; |
409 | 106 | bool is_const = false; |
410 | 106 | std::tie(path_column, is_const) = |
411 | 106 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
412 | 106 | path_const[i] = is_const; |
413 | 106 | if (path_column->is_nullable()) { |
414 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
415 | 4 | path_column = nullable_column.get_nested_column_ptr(); |
416 | 4 | path_null_maps[i] = &nullable_column.get_null_map_data(); |
417 | 4 | } |
418 | 106 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); |
419 | 106 | } |
420 | | |
421 | 106 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
422 | 106 | auto res = Impl::ColumnType::create(); |
423 | | |
424 | | // execute Impl |
425 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || |
426 | 106 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { |
427 | 106 | auto& res_data = res->get_chars(); |
428 | 106 | auto& res_offsets = res->get_offsets(); |
429 | 106 | RETURN_IF_ERROR(Impl::vector_vector_v2( |
430 | 106 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, |
431 | 106 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); |
432 | 106 | } else { |
433 | | // not support other extract type for now (e.g. int, double, ...) |
434 | 0 | DCHECK_EQ(jsonb_path_columns.size(), 1); |
435 | 0 | const auto& rdata = jsonb_path_columns[0]->get_chars(); |
436 | 0 | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); |
437 | |
|
438 | 0 | auto create_all_null_result = [&]() { |
439 | 0 | res = Impl::ColumnType::create(); |
440 | 0 | res->insert_default(); |
441 | 0 | auto nullable_column = |
442 | 0 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
443 | 0 | auto const_column = |
444 | 0 | ColumnConst::create(std::move(nullable_column), input_rows_count); |
445 | 0 | block.get_by_position(result).column = std::move(const_column); |
446 | 0 | return Status::OK(); |
447 | 0 | }; |
448 | |
|
449 | 0 | if (jsonb_data_const) { |
450 | 0 | if (data_null_map && (*data_null_map)[0]) { |
451 | 0 | return create_all_null_result(); |
452 | 0 | } |
453 | | |
454 | 0 | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), |
455 | 0 | rdata, roffsets, path_null_maps[0], |
456 | 0 | res->get_data(), null_map->get_data())); |
457 | 0 | } else if (path_const[0]) { |
458 | 0 | if (path_null_maps[0] && (*path_null_maps[0])[0]) { |
459 | 0 | return create_all_null_result(); |
460 | 0 | } |
461 | 0 | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, |
462 | 0 | jsonb_path_columns[0]->get_data_at(0), |
463 | 0 | res->get_data(), null_map->get_data())); |
464 | 0 | } else { |
465 | 0 | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, |
466 | 0 | roffsets, path_null_maps[0], res->get_data(), |
467 | 0 | null_map->get_data())); |
468 | 0 | } |
469 | 0 | } |
470 | | |
471 | 102 | block.get_by_position(result).column = |
472 | 106 | ColumnNullable::create(std::move(res), std::move(null_map)); |
473 | 106 | return Status::OK(); |
474 | 107 | } Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 377 | 107 | uint32_t result, size_t input_rows_count) const override { | 378 | 107 | DCHECK_GE(arguments.size(), 2); | 379 | | | 380 | 107 | ColumnPtr jsonb_data_column; | 381 | 107 | bool jsonb_data_const = false; | 382 | 107 | const NullMap* data_null_map = nullptr; | 383 | | | 384 | 107 | if (block.get_by_position(arguments[0]).type->get_primitive_type() != | 385 | 107 | PrimitiveType::TYPE_JSONB) { | 386 | 1 | return Status::InvalidArgument( | 387 | 1 | "jsonb_extract first argument should be json type, but got {}", | 388 | 1 | block.get_by_position(arguments[0]).type->get_name()); | 389 | 1 | } | 390 | | | 391 | | // prepare jsonb data column | 392 | 106 | std::tie(jsonb_data_column, jsonb_data_const) = | 393 | 106 | unpack_if_const(block.get_by_position(arguments[0]).column); | 394 | 106 | if (jsonb_data_column->is_nullable()) { | 395 | 106 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column); | 396 | 106 | jsonb_data_column = nullable_column.get_nested_column_ptr(); | 397 | 106 | data_null_map = &nullable_column.get_null_map_data(); | 398 | 106 | } | 399 | 106 | const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars(); | 400 | 106 | const auto& loffsets = | 401 | 106 | assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets(); | 402 | | | 403 | | // prepare parse path column prepare | 404 | 106 | std::vector<const ColumnString*> jsonb_path_columns; | 405 | 106 | std::vector<bool> path_const(arguments.size() - 1); | 406 | 106 | std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr); | 407 | 212 | for (int i = 0; i < arguments.size() - 1; ++i) { | 408 | 106 | ColumnPtr path_column; | 409 | 106 | bool is_const = false; | 410 | 106 | std::tie(path_column, is_const) = | 411 | 106 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); | 412 | 106 | path_const[i] = is_const; | 413 | 106 | if (path_column->is_nullable()) { | 414 | 4 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); | 415 | 4 | path_column = nullable_column.get_nested_column_ptr(); | 416 | 4 | path_null_maps[i] = &nullable_column.get_null_map_data(); | 417 | 4 | } | 418 | 106 | jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get())); | 419 | 106 | } | 420 | | | 421 | 106 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 422 | 106 | auto res = Impl::ColumnType::create(); | 423 | | | 424 | | // execute Impl | 425 | | if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> || | 426 | 106 | std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) { | 427 | 106 | auto& res_data = res->get_chars(); | 428 | 106 | auto& res_offsets = res->get_offsets(); | 429 | 106 | RETURN_IF_ERROR(Impl::vector_vector_v2( | 430 | 106 | context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns, | 431 | 106 | path_null_maps, path_const, res_data, res_offsets, null_map->get_data())); | 432 | | } else { | 433 | | // not support other extract type for now (e.g. int, double, ...) | 434 | | DCHECK_EQ(jsonb_path_columns.size(), 1); | 435 | | const auto& rdata = jsonb_path_columns[0]->get_chars(); | 436 | | const auto& roffsets = jsonb_path_columns[0]->get_offsets(); | 437 | | | 438 | | auto create_all_null_result = [&]() { | 439 | | res = Impl::ColumnType::create(); | 440 | | res->insert_default(); | 441 | | auto nullable_column = | 442 | | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); | 443 | | auto const_column = | 444 | | ColumnConst::create(std::move(nullable_column), input_rows_count); | 445 | | block.get_by_position(result).column = std::move(const_column); | 446 | | return Status::OK(); | 447 | | }; | 448 | | | 449 | | if (jsonb_data_const) { | 450 | | if (data_null_map && (*data_null_map)[0]) { | 451 | | return create_all_null_result(); | 452 | | } | 453 | | | 454 | | RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), | 455 | | rdata, roffsets, path_null_maps[0], | 456 | | res->get_data(), null_map->get_data())); | 457 | | } else if (path_const[0]) { | 458 | | if (path_null_maps[0] && (*path_null_maps[0])[0]) { | 459 | | return create_all_null_result(); | 460 | | } | 461 | | RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map, | 462 | | jsonb_path_columns[0]->get_data_at(0), | 463 | | res->get_data(), null_map->get_data())); | 464 | | } else { | 465 | | RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata, | 466 | | roffsets, path_null_maps[0], res->get_data(), | 467 | | null_map->get_data())); | 468 | | } | 469 | | } | 470 | | | 471 | 102 | block.get_by_position(result).column = | 472 | 106 | ColumnNullable::create(std::move(res), std::move(null_map)); | 473 | 106 | return Status::OK(); | 474 | 107 | } |
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
475 | | }; |
476 | | |
477 | | class FunctionJsonbKeys : public IFunction { |
478 | | public: |
479 | | static constexpr auto name = "json_keys"; |
480 | | static constexpr auto alias = "jsonb_keys"; |
481 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); } |
482 | 0 | String get_name() const override { return name; } |
483 | 1 | bool is_variadic() const override { return true; } |
484 | 0 | size_t get_number_of_arguments() const override { return 0; } |
485 | | |
486 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
487 | | |
488 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
489 | 0 | return make_nullable( |
490 | 0 | std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>()))); |
491 | 0 | } |
492 | | |
493 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
494 | 0 | uint32_t result, size_t input_rows_count) const override { |
495 | 0 | DCHECK_GE(arguments.size(), 1); |
496 | 0 | DCHECK(arguments.size() == 1 || arguments.size() == 2) |
497 | 0 | << "json_keys should have 1 or 2 arguments, but got " << arguments.size(); |
498 | |
|
499 | 0 | const NullMap* data_null_map = nullptr; |
500 | 0 | const ColumnString* col_from_string = nullptr; |
501 | | // prepare jsonb data column |
502 | 0 | auto&& [jsonb_data_column, json_data_const] = |
503 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
504 | 0 | if (jsonb_data_column->is_nullable()) { |
505 | 0 | const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get()); |
506 | 0 | col_from_string = |
507 | 0 | assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
508 | 0 | data_null_map = &nullable->get_null_map_data(); |
509 | 0 | } else { |
510 | 0 | col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
511 | 0 | } |
512 | | |
513 | | // prepare parse path column prepare, maybe we do not have path column |
514 | 0 | ColumnPtr jsonb_path_column = nullptr; |
515 | 0 | const ColumnString* jsonb_path_col = nullptr; |
516 | 0 | bool path_const = false; |
517 | 0 | const NullMap* path_null_map = nullptr; |
518 | 0 | if (arguments.size() == 2) { |
519 | | // we have should have a ColumnString for path |
520 | 0 | std::tie(jsonb_path_column, path_const) = |
521 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
522 | 0 | if (jsonb_path_column->is_nullable()) { |
523 | 0 | const auto* nullable = |
524 | 0 | check_and_get_column<ColumnNullable>(jsonb_path_column.get()); |
525 | 0 | jsonb_path_column = nullable->get_nested_column_ptr(); |
526 | 0 | path_null_map = &nullable->get_null_map_data(); |
527 | 0 | } |
528 | 0 | jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get()); |
529 | 0 | } |
530 | |
|
531 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
532 | 0 | NullMap& res_null_map = null_map->get_data(); |
533 | |
|
534 | 0 | auto dst_arr = ColumnArray::create( |
535 | 0 | ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()), |
536 | 0 | ColumnArray::ColumnOffsets::create()); |
537 | 0 | auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data()); |
538 | |
|
539 | 0 | Status st = std::visit( |
540 | 0 | [&](auto data_const, auto has_path, auto path_const) { |
541 | 0 | return inner_loop_impl<data_const, has_path, path_const>( |
542 | 0 | input_rows_count, *dst_arr, dst_nested_column, res_null_map, |
543 | 0 | *col_from_string, data_null_map, jsonb_path_col, path_null_map); |
544 | 0 | }, Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
545 | 0 | make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column), |
546 | 0 | make_bool_variant(path_const)); |
547 | 0 | if (!st.ok()) { |
548 | 0 | return st; |
549 | 0 | } |
550 | 0 | block.get_by_position(result).column = |
551 | 0 | ColumnNullable::create(std::move(dst_arr), std::move(null_map)); |
552 | 0 | return st; |
553 | 0 | } |
554 | | |
555 | | private: |
556 | | template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST> |
557 | | static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr, |
558 | | ColumnNullable& dst_nested_column, |
559 | | NullMap& res_null_map, |
560 | | const ColumnString& col_from_string, |
561 | | const NullMap* jsonb_data_nullmap, |
562 | | const ColumnString* jsonb_path_column, |
563 | 0 | const NullMap* path_null_map) { |
564 | | // if path is const, we just need to parse it once |
565 | 0 | JsonbPath const_path; |
566 | 0 | if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) { |
567 | 0 | StringRef r_raw_ref = jsonb_path_column->get_data_at(0); |
568 | 0 | if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) { |
569 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
570 | 0 | r_raw_ref.to_string()); |
571 | 0 | } |
572 | | |
573 | 0 | if (const_path.is_wildcard()) { |
574 | 0 | return Status::InvalidJsonPath( |
575 | 0 | "In this situation, path expressions may not contain the * and ** tokens " |
576 | 0 | "or an array range."); |
577 | 0 | } |
578 | 0 | } |
579 | | |
580 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
581 | 0 | auto index = index_check_const(i, JSONB_DATA_CONST); |
582 | | // if jsonb data is null or path column is null , we should return null |
583 | 0 | if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) { |
584 | 0 | res_null_map[i] = 1; |
585 | 0 | dst_arr.insert_default(); |
586 | 0 | continue; |
587 | 0 | } |
588 | 0 | if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) { |
589 | 0 | if (path_null_map && (*path_null_map)[i]) { |
590 | 0 | res_null_map[i] = 1; |
591 | 0 | dst_arr.insert_default(); |
592 | 0 | continue; |
593 | 0 | } |
594 | 0 | } |
595 | | |
596 | 0 | auto json_data = col_from_string.get_data_at(index); |
597 | 0 | const JsonbDocument* doc = nullptr; |
598 | 0 | auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc); |
599 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
600 | 0 | dst_arr.clear(); |
601 | 0 | return Status::InvalidArgument("jsonb data is invalid"); |
602 | 0 | } |
603 | 0 | const JsonbValue* obj_val; |
604 | 0 | JsonbFindResult find_result; |
605 | 0 | if constexpr (JSONB_PATH_PARAM) { |
606 | 0 | if constexpr (!JSON_PATH_CONST) { |
607 | 0 | auto data = jsonb_path_column->get_data_at(i); |
608 | 0 | JsonbPath path; |
609 | 0 | if (!path.seek(data.data, data.size)) { |
610 | 0 | return Status::InvalidArgument( |
611 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
612 | 0 | std::string_view(data.data, data.size), i); |
613 | 0 | } |
614 | | |
615 | 0 | if (path.is_wildcard()) { |
616 | 0 | return Status::InvalidJsonPath( |
617 | 0 | "In this situation, path expressions may not contain the * and ** " |
618 | 0 | "tokens " |
619 | 0 | "or an array range. at row: {}", |
620 | 0 | i); |
621 | 0 | } |
622 | 0 | find_result = doc->getValue()->findValue(path); |
623 | 0 | } else { |
624 | 0 | find_result = doc->getValue()->findValue(const_path); |
625 | 0 | } |
626 | 0 | obj_val = find_result.value; |
627 | 0 | } else { |
628 | 0 | obj_val = doc->getValue(); |
629 | 0 | } |
630 | | |
631 | 0 | if (!obj_val || !obj_val->isObject()) { |
632 | | // if jsonb data is not object we should return null |
633 | 0 | res_null_map[i] = 1; |
634 | 0 | dst_arr.insert_default(); |
635 | 0 | continue; |
636 | 0 | } |
637 | 0 | const auto* obj = obj_val->unpack<ObjectVal>(); |
638 | 0 | for (const auto& it : *obj) { |
639 | 0 | dst_nested_column.insert_data(it.getKeyStr(), it.klen()); |
640 | 0 | } |
641 | 0 | dst_arr.get_offsets().push_back(dst_nested_column.size()); |
642 | 0 | } //for |
643 | 0 | return Status::OK(); |
644 | 0 | } Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_ |
645 | | }; |
646 | | |
647 | | class FunctionJsonbExtractPath : public IFunction { |
648 | | public: |
649 | | static constexpr auto name = "json_exists_path"; |
650 | | static constexpr auto alias = "jsonb_exists_path"; |
651 | | using ColumnType = ColumnUInt8; |
652 | | using Container = typename ColumnType::Container; |
653 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); } |
654 | 1 | String get_name() const override { return name; } |
655 | 0 | size_t get_number_of_arguments() const override { return 2; } |
656 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
657 | | // it only needs to indicate existence and does not need to return nullable values. |
658 | 0 | const auto nullable = std::ranges::any_of( |
659 | 0 | arguments, [](const DataTypePtr& type) { return type->is_nullable(); }); |
660 | 0 | if (nullable) { |
661 | 0 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
662 | 0 | } else { |
663 | 0 | return std::make_shared<DataTypeUInt8>(); |
664 | 0 | } |
665 | 0 | } |
666 | | |
667 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
668 | | |
669 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
670 | 0 | uint32_t result, size_t input_rows_count) const override { |
671 | | // prepare jsonb data column |
672 | 0 | auto&& [jsonb_data_column, jsonb_data_const] = |
673 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
674 | |
|
675 | 0 | const NullMap* data_null_map = nullptr; |
676 | 0 | const ColumnString* data_col = nullptr; |
677 | 0 | if (jsonb_data_column->is_nullable()) { |
678 | 0 | const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get()); |
679 | 0 | data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
680 | 0 | data_null_map = &nullable->get_null_map_data(); |
681 | 0 | } else { |
682 | 0 | data_col = assert_cast<const ColumnString*>(jsonb_data_column.get()); |
683 | 0 | } |
684 | |
|
685 | 0 | const auto& ldata = data_col->get_chars(); |
686 | 0 | const auto& loffsets = data_col->get_offsets(); |
687 | | |
688 | | // prepare parse path column prepare |
689 | 0 | auto&& [path_column, path_const] = |
690 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
691 | 0 | const ColumnString* path_col = nullptr; |
692 | 0 | const NullMap* path_null_map = nullptr; |
693 | 0 | if (path_column->is_nullable()) { |
694 | 0 | const auto* nullable = assert_cast<const ColumnNullable*>(path_column.get()); |
695 | 0 | path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get()); |
696 | 0 | path_null_map = &nullable->get_null_map_data(); |
697 | 0 | } else { |
698 | 0 | path_col = assert_cast<const ColumnString*>(path_column.get()); |
699 | 0 | } |
700 | |
|
701 | 0 | DCHECK(!(jsonb_data_const && path_const)) |
702 | 0 | << "jsonb_data_const and path_const should not be both const"; |
703 | |
|
704 | 0 | auto create_all_null_result = [&]() { |
705 | 0 | auto res = ColumnType::create(); |
706 | 0 | res->insert_default(); |
707 | 0 | auto nullable_column = |
708 | 0 | ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1)); |
709 | 0 | auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count); |
710 | 0 | block.get_by_position(result).column = std::move(const_column); |
711 | 0 | return Status::OK(); |
712 | 0 | }; |
713 | |
|
714 | 0 | MutableColumnPtr result_null_map_column; |
715 | 0 | NullMap* result_null_map = nullptr; |
716 | 0 | if (data_null_map || path_null_map) { |
717 | 0 | result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
718 | 0 | result_null_map = &assert_cast<ColumnUInt8&>(*result_null_map_column).get_data(); |
719 | |
|
720 | 0 | if (data_null_map) { |
721 | 0 | VectorizedUtils::update_null_map(*result_null_map, *data_null_map, |
722 | 0 | jsonb_data_const); |
723 | 0 | } |
724 | |
|
725 | 0 | if (path_null_map) { |
726 | 0 | VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const); |
727 | 0 | } |
728 | |
|
729 | 0 | if (!simd::contain_zero(result_null_map->data(), input_rows_count)) { |
730 | 0 | return create_all_null_result(); |
731 | 0 | } |
732 | 0 | } |
733 | | |
734 | 0 | auto res = ColumnType::create(); |
735 | |
|
736 | 0 | bool is_invalid_json_path = false; |
737 | |
|
738 | 0 | const auto& rdata = path_col->get_chars(); |
739 | 0 | const auto& roffsets = path_col->get_offsets(); |
740 | 0 | if (jsonb_data_const) { |
741 | 0 | if (data_null_map && (*data_null_map)[0]) { |
742 | 0 | return create_all_null_result(); |
743 | 0 | } |
744 | 0 | scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(), |
745 | 0 | result_null_map, is_invalid_json_path); |
746 | 0 | } else if (path_const) { |
747 | 0 | if (path_null_map && (*path_null_map)[0]) { |
748 | 0 | return create_all_null_result(); |
749 | 0 | } |
750 | 0 | vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(), |
751 | 0 | result_null_map, is_invalid_json_path); |
752 | 0 | } else { |
753 | 0 | vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(), |
754 | 0 | result_null_map, is_invalid_json_path); |
755 | 0 | } |
756 | 0 | if (is_invalid_json_path) { |
757 | 0 | return Status::InvalidArgument( |
758 | 0 | "Json path error: Invalid Json Path for value: {}", |
759 | 0 | std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size())); |
760 | 0 | } |
761 | | |
762 | 0 | if (result_null_map) { |
763 | 0 | auto nullabel_col = |
764 | 0 | ColumnNullable::create(std::move(res), std::move(result_null_map_column)); |
765 | 0 | block.get_by_position(result).column = std::move(nullabel_col); |
766 | 0 | } else { |
767 | 0 | block.get_by_position(result).column = std::move(res); |
768 | 0 | } |
769 | 0 | return Status::OK(); |
770 | 0 | } |
771 | | |
772 | | private: |
773 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str, |
774 | 0 | size_t l_str_size, JsonbPath& path) { |
775 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
776 | 0 | const JsonbDocument* doc = nullptr; |
777 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
778 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
779 | 0 | return; |
780 | 0 | } |
781 | | |
782 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
783 | 0 | auto result = doc->getValue()->findValue(path); |
784 | |
|
785 | 0 | if (result.value) { |
786 | 0 | res[i] = 1; |
787 | 0 | } |
788 | 0 | } |
789 | | static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
790 | | const ColumnString::Offsets& loffsets, |
791 | | const ColumnString::Chars& rdata, |
792 | | const ColumnString::Offsets& roffsets, Container& res, |
793 | 0 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
794 | 0 | const size_t size = loffsets.size(); |
795 | 0 | res.resize_fill(size, 0); |
796 | |
|
797 | 0 | for (size_t i = 0; i < size; i++) { |
798 | 0 | if (result_null_map && (*result_null_map)[i]) { |
799 | 0 | continue; |
800 | 0 | } |
801 | | |
802 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
803 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
804 | |
|
805 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
806 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
807 | |
|
808 | 0 | JsonbPath path; |
809 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
810 | 0 | is_invalid_json_path = true; |
811 | 0 | return; |
812 | 0 | } |
813 | | |
814 | 0 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
815 | 0 | } |
816 | 0 | } |
817 | | static void scalar_vector(FunctionContext* context, const StringRef& ldata, |
818 | | const ColumnString::Chars& rdata, |
819 | | const ColumnString::Offsets& roffsets, Container& res, |
820 | 0 | const NullMap* result_null_map, bool& is_invalid_json_path) { |
821 | 0 | const size_t size = roffsets.size(); |
822 | 0 | res.resize_fill(size, 0); |
823 | |
|
824 | 0 | for (size_t i = 0; i < size; i++) { |
825 | 0 | if (result_null_map && (*result_null_map)[i]) { |
826 | 0 | continue; |
827 | 0 | } |
828 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
829 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
830 | |
|
831 | 0 | JsonbPath path; |
832 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
833 | 0 | is_invalid_json_path = true; |
834 | 0 | return; |
835 | 0 | } |
836 | | |
837 | 0 | inner_loop_impl(i, res, ldata.data, ldata.size, path); |
838 | 0 | } |
839 | 0 | } |
840 | | static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
841 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
842 | | Container& res, const NullMap* result_null_map, |
843 | 0 | bool& is_invalid_json_path) { |
844 | 0 | const size_t size = loffsets.size(); |
845 | 0 | res.resize_fill(size, 0); |
846 | |
|
847 | 0 | JsonbPath path; |
848 | 0 | if (!path.seek(rdata.data, rdata.size)) { |
849 | 0 | is_invalid_json_path = true; |
850 | 0 | return; |
851 | 0 | } |
852 | | |
853 | 0 | for (size_t i = 0; i < size; i++) { |
854 | 0 | if (result_null_map && (*result_null_map)[i]) { |
855 | 0 | continue; |
856 | 0 | } |
857 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
858 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
859 | |
|
860 | 0 | inner_loop_impl(i, res, l_raw_str, l_str_size, path); |
861 | 0 | } |
862 | 0 | } |
863 | | }; |
864 | | |
865 | | template <typename ValueType> |
866 | | struct JsonbExtractStringImpl { |
867 | | using ReturnType = typename ValueType::ReturnType; |
868 | | using ColumnType = typename ValueType::ColumnType; |
869 | | |
870 | | private: |
871 | | static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i, |
872 | | ColumnString::Chars& res_data, |
873 | | ColumnString::Offsets& res_offsets, NullMap& null_map, |
874 | | std::unique_ptr<JsonbToJson>& formater, |
875 | 4.59k | const char* l_raw, size_t l_size, JsonbPath& path) { |
876 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
877 | 4.59k | const JsonbDocument* doc = nullptr; |
878 | 4.59k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
879 | 4.59k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
880 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
881 | 0 | return; |
882 | 0 | } |
883 | | |
884 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
885 | 4.59k | auto find_result = doc->getValue()->findValue(path); |
886 | | |
887 | 4.59k | if (UNLIKELY(!find_result.value)) { |
888 | 46 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
889 | 46 | return; |
890 | 46 | } |
891 | | |
892 | 4.55k | if constexpr (ValueType::only_get_type) { |
893 | 0 | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, |
894 | 0 | res_data, res_offsets); |
895 | 0 | return; |
896 | 4.55k | } else { |
897 | 4.55k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); |
898 | 4.55k | if constexpr (ValueType::no_quotes) { |
899 | 0 | if (find_result.value->isString()) { |
900 | 0 | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); |
901 | 0 | const auto* blob = str_value->getBlob(); |
902 | 0 | if (str_value->length() > 1 && blob[0] == '"' && |
903 | 0 | blob[str_value->length() - 1] == '"') { |
904 | 0 | writer->writeStartString(); |
905 | 0 | writer->writeString(blob + 1, str_value->length() - 2); |
906 | 0 | writer->writeEndString(); |
907 | 0 | StringOP::push_value_string( |
908 | 0 | std::string_view(writer->getOutput()->getBuffer(), |
909 | 0 | writer->getOutput()->getSize()), |
910 | 0 | i, res_data, res_offsets); |
911 | 0 | return; |
912 | 0 | } |
913 | 0 | } |
914 | 0 | } |
915 | 0 | writer->writeValueSimple(find_result.value); |
916 | 4.55k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
917 | 4.55k | writer->getOutput()->getSize()), |
918 | 4.55k | i, res_data, res_offsets); |
919 | 4.55k | } |
920 | 4.55k | } Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE Line | Count | Source | 875 | 4.59k | const char* l_raw, size_t l_size, JsonbPath& path) { | 876 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 877 | 4.59k | const JsonbDocument* doc = nullptr; | 878 | 4.59k | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 879 | 4.59k | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 880 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 881 | 0 | return; | 882 | 0 | } | 883 | | | 884 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory | 885 | 4.59k | auto find_result = doc->getValue()->findValue(path); | 886 | | | 887 | 4.59k | if (UNLIKELY(!find_result.value)) { | 888 | 46 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 889 | 46 | return; | 890 | 46 | } | 891 | | | 892 | | if constexpr (ValueType::only_get_type) { | 893 | | StringOP::push_value_string(std::string_view(find_result.value->typeName()), i, | 894 | | res_data, res_offsets); | 895 | | return; | 896 | 4.55k | } else { | 897 | 4.55k | static_assert(std::is_same_v<DataTypeJsonb, ReturnType>); | 898 | | if constexpr (ValueType::no_quotes) { | 899 | | if (find_result.value->isString()) { | 900 | | const auto* str_value = find_result.value->unpack<JsonbStringVal>(); | 901 | | const auto* blob = str_value->getBlob(); | 902 | | if (str_value->length() > 1 && blob[0] == '"' && | 903 | | blob[str_value->length() - 1] == '"') { | 904 | | writer->writeStartString(); | 905 | | writer->writeString(blob + 1, str_value->length() - 2); | 906 | | writer->writeEndString(); | 907 | | StringOP::push_value_string( | 908 | | std::string_view(writer->getOutput()->getBuffer(), | 909 | | writer->getOutput()->getSize()), | 910 | | i, res_data, res_offsets); | 911 | | return; | 912 | | } | 913 | | } | 914 | | } | 915 | 4.55k | writer->writeValueSimple(find_result.value); | 916 | 4.55k | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 917 | 4.55k | writer->getOutput()->getSize()), | 918 | 4.55k | i, res_data, res_offsets); | 919 | 4.55k | } | 920 | 4.55k | } |
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE |
921 | | |
922 | | public: |
923 | | // for jsonb_extract_string |
924 | | static Status vector_vector_v2( |
925 | | FunctionContext* context, const ColumnString::Chars& ldata, |
926 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
927 | | const bool& json_data_const, |
928 | | const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths |
929 | | const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const, |
930 | 106 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { |
931 | 106 | const size_t input_rows_count = null_map.size(); |
932 | 106 | res_offsets.resize(input_rows_count); |
933 | | |
934 | 106 | auto writer = std::make_unique<JsonbWriter>(); |
935 | 106 | std::unique_ptr<JsonbToJson> formater; |
936 | | |
937 | | // reuseable json path list, espacially for const path |
938 | 106 | std::vector<JsonbPath> json_path_list; |
939 | 106 | json_path_list.resize(rdata_columns.size()); |
940 | | |
941 | | // lambda function to parse json path for row i and path pi |
942 | 192 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { |
943 | 192 | const auto index = index_check_const(i, path_const[pi]); |
944 | | |
945 | 192 | const ColumnString* path_col = rdata_columns[pi]; |
946 | 192 | const ColumnString::Chars& rdata = path_col->get_chars(); |
947 | 192 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); |
948 | 192 | size_t r_off = roffsets[index - 1]; |
949 | 192 | size_t r_size = roffsets[index] - r_off; |
950 | 192 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); |
951 | | |
952 | 192 | JsonbPath path; |
953 | 192 | if (!path.seek(r_raw, r_size)) { |
954 | 4 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
955 | 4 | std::string_view(r_raw, r_size)); |
956 | 4 | } |
957 | | |
958 | 188 | json_path_list[pi] = std::move(path); |
959 | | |
960 | 188 | return Status::OK(); |
961 | 192 | }; Unexecuted instantiation: _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm _ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm Line | Count | Source | 942 | 192 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 943 | 192 | const auto index = index_check_const(i, path_const[pi]); | 944 | | | 945 | 192 | const ColumnString* path_col = rdata_columns[pi]; | 946 | 192 | const ColumnString::Chars& rdata = path_col->get_chars(); | 947 | 192 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 948 | 192 | size_t r_off = roffsets[index - 1]; | 949 | 192 | size_t r_size = roffsets[index] - r_off; | 950 | 192 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 951 | | | 952 | 192 | JsonbPath path; | 953 | 192 | if (!path.seek(r_raw, r_size)) { | 954 | 4 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 955 | 4 | std::string_view(r_raw, r_size)); | 956 | 4 | } | 957 | | | 958 | 188 | json_path_list[pi] = std::move(path); | 959 | | | 960 | 188 | return Status::OK(); | 961 | 192 | }; |
Unexecuted instantiation: _ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm |
962 | | |
963 | 208 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { |
964 | 106 | if (path_const[pi]) { |
965 | 102 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { |
966 | 0 | continue; |
967 | 0 | } |
968 | 102 | RETURN_IF_ERROR(parse_json_path(0, pi)); |
969 | 102 | } |
970 | 106 | } |
971 | | |
972 | 102 | res_data.reserve(ldata.size()); |
973 | 4.70k | for (size_t i = 0; i < input_rows_count; ++i) { |
974 | 4.60k | if (null_map[i]) { |
975 | 0 | continue; |
976 | 0 | } |
977 | | |
978 | 4.60k | const auto data_index = index_check_const(i, json_data_const); |
979 | 4.60k | if (l_null_map && (*l_null_map)[data_index]) { |
980 | 4 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
981 | 4 | continue; |
982 | 4 | } |
983 | | |
984 | 4.59k | size_t l_off = loffsets[data_index - 1]; |
985 | 4.59k | size_t l_size = loffsets[data_index] - l_off; |
986 | 4.59k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); |
987 | 4.59k | if (rdata_columns.size() == 1) { // just return origin value |
988 | 4.59k | const auto path_index = index_check_const(i, path_const[0]); |
989 | 4.59k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { |
990 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
991 | 0 | continue; |
992 | 0 | } |
993 | | |
994 | 4.59k | if (!path_const[0]) { |
995 | 90 | RETURN_IF_ERROR(parse_json_path(i, 0)); |
996 | 90 | } |
997 | | |
998 | 4.59k | writer->reset(); |
999 | 4.59k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, |
1000 | 4.59k | l_size, json_path_list[0]); |
1001 | 4.59k | } else { // will make array string to user |
1002 | 0 | writer->reset(); |
1003 | 0 | bool has_value = false; |
1004 | | |
1005 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1006 | 0 | const JsonbDocument* doc = nullptr; |
1007 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); |
1008 | |
|
1009 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { |
1010 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1011 | 0 | continue; |
1012 | 0 | } |
1013 | | |
1014 | 0 | const auto path_index = index_check_const(i, path_const[pi]); |
1015 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { |
1016 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1017 | 0 | break; |
1018 | 0 | } |
1019 | | |
1020 | 0 | if (!path_const[pi]) { |
1021 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); |
1022 | 0 | } |
1023 | | |
1024 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); |
1025 | |
|
1026 | 0 | if (find_result.value) { |
1027 | 0 | if (!has_value) { |
1028 | 0 | has_value = true; |
1029 | 0 | writer->writeStartArray(); |
1030 | 0 | } |
1031 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { |
1032 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], |
1033 | | // if value is array, we should write all items in array, instead of write the array itself. |
1034 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] |
1035 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { |
1036 | 0 | writer->writeValue(&item); |
1037 | 0 | } |
1038 | 0 | } else { |
1039 | 0 | writer->writeValue(find_result.value); |
1040 | 0 | } |
1041 | 0 | } |
1042 | 0 | } |
1043 | 0 | if (has_value) { |
1044 | 0 | writer->writeEndArray(); |
1045 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), |
1046 | 0 | writer->getOutput()->getSize()), |
1047 | 0 | i, res_data, res_offsets); |
1048 | 0 | } else { |
1049 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1050 | 0 | } |
1051 | 0 | } |
1052 | 4.59k | } //for |
1053 | 102 | return Status::OK(); |
1054 | 102 | } Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ Line | Count | Source | 930 | 106 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) { | 931 | 106 | const size_t input_rows_count = null_map.size(); | 932 | 106 | res_offsets.resize(input_rows_count); | 933 | | | 934 | 106 | auto writer = std::make_unique<JsonbWriter>(); | 935 | 106 | std::unique_ptr<JsonbToJson> formater; | 936 | | | 937 | | // reuseable json path list, espacially for const path | 938 | 106 | std::vector<JsonbPath> json_path_list; | 939 | 106 | json_path_list.resize(rdata_columns.size()); | 940 | | | 941 | | // lambda function to parse json path for row i and path pi | 942 | 106 | auto parse_json_path = [&](size_t i, size_t pi) -> Status { | 943 | 106 | const auto index = index_check_const(i, path_const[pi]); | 944 | | | 945 | 106 | const ColumnString* path_col = rdata_columns[pi]; | 946 | 106 | const ColumnString::Chars& rdata = path_col->get_chars(); | 947 | 106 | const ColumnString::Offsets& roffsets = path_col->get_offsets(); | 948 | 106 | size_t r_off = roffsets[index - 1]; | 949 | 106 | size_t r_size = roffsets[index] - r_off; | 950 | 106 | const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]); | 951 | | | 952 | 106 | JsonbPath path; | 953 | 106 | if (!path.seek(r_raw, r_size)) { | 954 | 106 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", | 955 | 106 | std::string_view(r_raw, r_size)); | 956 | 106 | } | 957 | | | 958 | 106 | json_path_list[pi] = std::move(path); | 959 | | | 960 | 106 | return Status::OK(); | 961 | 106 | }; | 962 | | | 963 | 208 | for (size_t pi = 0; pi < rdata_columns.size(); pi++) { | 964 | 106 | if (path_const[pi]) { | 965 | 102 | if (r_null_maps[pi] && (*r_null_maps[pi])[0]) { | 966 | 0 | continue; | 967 | 0 | } | 968 | 102 | RETURN_IF_ERROR(parse_json_path(0, pi)); | 969 | 102 | } | 970 | 106 | } | 971 | | | 972 | 102 | res_data.reserve(ldata.size()); | 973 | 4.70k | for (size_t i = 0; i < input_rows_count; ++i) { | 974 | 4.60k | if (null_map[i]) { | 975 | 0 | continue; | 976 | 0 | } | 977 | | | 978 | 4.60k | const auto data_index = index_check_const(i, json_data_const); | 979 | 4.60k | if (l_null_map && (*l_null_map)[data_index]) { | 980 | 4 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 981 | 4 | continue; | 982 | 4 | } | 983 | | | 984 | 4.59k | size_t l_off = loffsets[data_index - 1]; | 985 | 4.59k | size_t l_size = loffsets[data_index] - l_off; | 986 | 4.59k | const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]); | 987 | 4.59k | if (rdata_columns.size() == 1) { // just return origin value | 988 | 4.59k | const auto path_index = index_check_const(i, path_const[0]); | 989 | 4.59k | if (r_null_maps[0] && (*r_null_maps[0])[path_index]) { | 990 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 991 | 0 | continue; | 992 | 0 | } | 993 | | | 994 | 4.59k | if (!path_const[0]) { | 995 | 90 | RETURN_IF_ERROR(parse_json_path(i, 0)); | 996 | 90 | } | 997 | | | 998 | 4.59k | writer->reset(); | 999 | 4.59k | inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw, | 1000 | 4.59k | l_size, json_path_list[0]); | 1001 | 4.59k | } else { // will make array string to user | 1002 | 0 | writer->reset(); | 1003 | 0 | bool has_value = false; | 1004 | | | 1005 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory | 1006 | 0 | const JsonbDocument* doc = nullptr; | 1007 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc); | 1008 | |
| 1009 | 0 | for (size_t pi = 0; pi < rdata_columns.size(); ++pi) { | 1010 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1011 | 0 | continue; | 1012 | 0 | } | 1013 | | | 1014 | 0 | const auto path_index = index_check_const(i, path_const[pi]); | 1015 | 0 | if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) { | 1016 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1017 | 0 | break; | 1018 | 0 | } | 1019 | | | 1020 | 0 | if (!path_const[pi]) { | 1021 | 0 | RETURN_IF_ERROR(parse_json_path(i, pi)); | 1022 | 0 | } | 1023 | | | 1024 | 0 | auto find_result = doc->getValue()->findValue(json_path_list[pi]); | 1025 | |
| 1026 | 0 | if (find_result.value) { | 1027 | 0 | if (!has_value) { | 1028 | 0 | has_value = true; | 1029 | 0 | writer->writeStartArray(); | 1030 | 0 | } | 1031 | 0 | if (find_result.value->isArray() && find_result.is_wildcard) { | 1032 | | // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]], | 1033 | | // if value is array, we should write all items in array, instead of write the array itself. | 1034 | | // finaly we will get results like [1, 2, 3, 4, 5, 6] | 1035 | 0 | for (const auto& item : *find_result.value->unpack<ArrayVal>()) { | 1036 | 0 | writer->writeValue(&item); | 1037 | 0 | } | 1038 | 0 | } else { | 1039 | 0 | writer->writeValue(find_result.value); | 1040 | 0 | } | 1041 | 0 | } | 1042 | 0 | } | 1043 | 0 | if (has_value) { | 1044 | 0 | writer->writeEndArray(); | 1045 | 0 | StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(), | 1046 | 0 | writer->getOutput()->getSize()), | 1047 | 0 | i, res_data, res_offsets); | 1048 | 0 | } else { | 1049 | 0 | StringOP::push_null_string(i, res_data, res_offsets, null_map); | 1050 | 0 | } | 1051 | 0 | } | 1052 | 4.59k | } //for | 1053 | 102 | return Status::OK(); | 1054 | 102 | } |
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ |
1055 | | |
1056 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1057 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1058 | | const ColumnString::Chars& rdata, |
1059 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1060 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1061 | | NullMap& null_map) { |
1062 | | size_t input_rows_count = loffsets.size(); |
1063 | | res_offsets.resize(input_rows_count); |
1064 | | |
1065 | | std::unique_ptr<JsonbToJson> formater; |
1066 | | |
1067 | | JsonbWriter writer; |
1068 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1069 | | if (l_null_map && (*l_null_map)[i]) { |
1070 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1071 | | continue; |
1072 | | } |
1073 | | |
1074 | | if (r_null_map && (*r_null_map)[i]) { |
1075 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1076 | | continue; |
1077 | | } |
1078 | | |
1079 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1080 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1081 | | |
1082 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1083 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1084 | | |
1085 | | JsonbPath path; |
1086 | | if (!path.seek(r_raw, r_size)) { |
1087 | | return Status::InvalidArgument( |
1088 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1089 | | std::string_view(r_raw, r_size), i); |
1090 | | } |
1091 | | |
1092 | | writer.reset(); |
1093 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1094 | | path); |
1095 | | } //for |
1096 | | return Status::OK(); |
1097 | | } //function |
1098 | | |
1099 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1100 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1101 | | const StringRef& rdata, ColumnString::Chars& res_data, |
1102 | | ColumnString::Offsets& res_offsets, NullMap& null_map) { |
1103 | | size_t input_rows_count = loffsets.size(); |
1104 | | res_offsets.resize(input_rows_count); |
1105 | | |
1106 | | std::unique_ptr<JsonbToJson> formater; |
1107 | | |
1108 | | JsonbPath path; |
1109 | | if (!path.seek(rdata.data, rdata.size)) { |
1110 | | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1111 | | std::string_view(rdata.data, rdata.size)); |
1112 | | } |
1113 | | |
1114 | | JsonbWriter writer; |
1115 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1116 | | if (l_null_map && (*l_null_map)[i]) { |
1117 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1118 | | continue; |
1119 | | } |
1120 | | |
1121 | | int l_size = loffsets[i] - loffsets[i - 1]; |
1122 | | const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1123 | | |
1124 | | writer.reset(); |
1125 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size, |
1126 | | path); |
1127 | | } //for |
1128 | | return Status::OK(); |
1129 | | } //function |
1130 | | |
1131 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1132 | | const ColumnString::Chars& rdata, |
1133 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1134 | | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1135 | | NullMap& null_map) { |
1136 | | size_t input_rows_count = roffsets.size(); |
1137 | | res_offsets.resize(input_rows_count); |
1138 | | |
1139 | | std::unique_ptr<JsonbToJson> formater; |
1140 | | |
1141 | | JsonbWriter writer; |
1142 | | |
1143 | | for (size_t i = 0; i < input_rows_count; ++i) { |
1144 | | if (r_null_map && (*r_null_map)[i]) { |
1145 | | StringOP::push_null_string(i, res_data, res_offsets, null_map); |
1146 | | continue; |
1147 | | } |
1148 | | |
1149 | | int r_size = roffsets[i] - roffsets[i - 1]; |
1150 | | const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1151 | | |
1152 | | JsonbPath path; |
1153 | | if (!path.seek(r_raw, r_size)) { |
1154 | | return Status::InvalidArgument( |
1155 | | "Json path error: Invalid Json Path for value: {} at row: {}", |
1156 | | std::string_view(r_raw, r_size), i); |
1157 | | } |
1158 | | |
1159 | | writer.reset(); |
1160 | | inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data, |
1161 | | ldata.size, path); |
1162 | | } //for |
1163 | | return Status::OK(); |
1164 | | } //function |
1165 | | }; |
1166 | | |
1167 | | struct JsonbExtractIsnull { |
1168 | | static constexpr auto name = "json_extract_isnull"; |
1169 | | static constexpr auto alias = "jsonb_extract_isnull"; |
1170 | | |
1171 | | using ReturnType = DataTypeUInt8; |
1172 | | using ColumnType = ColumnUInt8; |
1173 | | using Container = typename ColumnType::Container; |
1174 | | |
1175 | | private: |
1176 | | static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map, |
1177 | | const char* l_raw_str, size_t l_str_size, |
1178 | 0 | JsonbPath& path) { |
1179 | 0 | if (null_map[i]) { |
1180 | 0 | res[i] = 0; |
1181 | 0 | return; |
1182 | 0 | } |
1183 | | |
1184 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1185 | 0 | const JsonbDocument* doc = nullptr; |
1186 | 0 | auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc); |
1187 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1188 | 0 | null_map[i] = 1; |
1189 | 0 | res[i] = 0; |
1190 | 0 | return; |
1191 | 0 | } |
1192 | | |
1193 | | // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
1194 | 0 | auto find_result = doc->getValue()->findValue(path); |
1195 | 0 | const auto* value = find_result.value; |
1196 | |
|
1197 | 0 | if (UNLIKELY(!value)) { |
1198 | 0 | null_map[i] = 1; |
1199 | 0 | res[i] = 0; |
1200 | 0 | return; |
1201 | 0 | } |
1202 | | |
1203 | 0 | res[i] = value->isNull(); |
1204 | 0 | } |
1205 | | |
1206 | | public: |
1207 | | // for jsonb_extract_int/int64/double |
1208 | | static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata, |
1209 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1210 | | const ColumnString::Chars& rdata, |
1211 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1212 | 0 | Container& res, NullMap& null_map) { |
1213 | 0 | size_t size = loffsets.size(); |
1214 | 0 | res.resize(size); |
1215 | |
|
1216 | 0 | for (size_t i = 0; i < loffsets.size(); i++) { |
1217 | 0 | if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) { |
1218 | 0 | res[i] = 0; |
1219 | 0 | null_map[i] = 1; |
1220 | 0 | continue; |
1221 | 0 | } |
1222 | | |
1223 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1224 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1225 | |
|
1226 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1227 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1228 | |
|
1229 | 0 | JsonbPath path; |
1230 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
1231 | 0 | return Status::InvalidArgument( |
1232 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1233 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1234 | 0 | } |
1235 | | |
1236 | 0 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1237 | 0 | } //for |
1238 | 0 | return Status::OK(); |
1239 | 0 | } //function |
1240 | | |
1241 | | static Status scalar_vector(FunctionContext* context, const StringRef& ldata, |
1242 | | const ColumnString::Chars& rdata, |
1243 | | const ColumnString::Offsets& roffsets, const NullMap* r_null_map, |
1244 | 0 | Container& res, NullMap& null_map) { |
1245 | 0 | size_t size = roffsets.size(); |
1246 | 0 | res.resize(size); |
1247 | |
|
1248 | 0 | for (size_t i = 0; i < size; i++) { |
1249 | 0 | if (r_null_map && (*r_null_map)[i]) { |
1250 | 0 | res[i] = 0; |
1251 | 0 | null_map[i] = 1; |
1252 | 0 | continue; |
1253 | 0 | } |
1254 | | |
1255 | 0 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
1256 | 0 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
1257 | |
|
1258 | 0 | JsonbPath path; |
1259 | 0 | if (!path.seek(r_raw_str, r_str_size)) { |
1260 | 0 | return Status::InvalidArgument( |
1261 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
1262 | 0 | std::string_view(r_raw_str, r_str_size), i); |
1263 | 0 | } |
1264 | | |
1265 | 0 | inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path); |
1266 | 0 | } //for |
1267 | 0 | return Status::OK(); |
1268 | 0 | } //function |
1269 | | |
1270 | | static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata, |
1271 | | const ColumnString::Offsets& loffsets, const NullMap* l_null_map, |
1272 | 0 | const StringRef& rdata, Container& res, NullMap& null_map) { |
1273 | 0 | size_t size = loffsets.size(); |
1274 | 0 | res.resize(size); |
1275 | |
|
1276 | 0 | JsonbPath path; |
1277 | 0 | if (!path.seek(rdata.data, rdata.size)) { |
1278 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1279 | 0 | std::string_view(rdata.data, rdata.size)); |
1280 | 0 | } |
1281 | | |
1282 | 0 | for (size_t i = 0; i < loffsets.size(); i++) { |
1283 | 0 | if (l_null_map && (*l_null_map)[i]) { |
1284 | 0 | res[i] = 0; |
1285 | 0 | null_map[i] = 1; |
1286 | 0 | continue; |
1287 | 0 | } |
1288 | | |
1289 | 0 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
1290 | 0 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
1291 | |
|
1292 | 0 | inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path); |
1293 | 0 | } //for |
1294 | 0 | return Status::OK(); |
1295 | 0 | } //function |
1296 | | }; |
1297 | | |
1298 | | struct JsonbTypeJson { |
1299 | | using T = std::string; |
1300 | | using ReturnType = DataTypeJsonb; |
1301 | | using ColumnType = ColumnString; |
1302 | | static const bool only_get_type = false; |
1303 | | static const bool no_quotes = false; |
1304 | | }; |
1305 | | |
1306 | | struct JsonbTypeJsonNoQuotes { |
1307 | | using T = std::string; |
1308 | | using ReturnType = DataTypeJsonb; |
1309 | | using ColumnType = ColumnString; |
1310 | | static const bool only_get_type = false; |
1311 | | static const bool no_quotes = true; |
1312 | | }; |
1313 | | |
1314 | | struct JsonbTypeType { |
1315 | | using T = std::string; |
1316 | | using ReturnType = DataTypeString; |
1317 | | using ColumnType = ColumnString; |
1318 | | static const bool only_get_type = true; |
1319 | | static const bool no_quotes = false; |
1320 | | }; |
1321 | | |
1322 | | struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> { |
1323 | | static constexpr auto name = "jsonb_extract"; |
1324 | | static constexpr auto alias = "json_extract"; |
1325 | | }; |
1326 | | |
1327 | | struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> { |
1328 | | static constexpr auto name = "jsonb_extract_no_quotes"; |
1329 | | static constexpr auto alias = "json_extract_no_quotes"; |
1330 | | }; |
1331 | | |
1332 | | struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> { |
1333 | | static constexpr auto name = "json_type"; |
1334 | | static constexpr auto alias = "jsonb_type"; |
1335 | | }; |
1336 | | |
1337 | | using FunctionJsonbExists = FunctionJsonbExtractPath; |
1338 | | using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>; |
1339 | | |
1340 | | using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>; |
1341 | | using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>; |
1342 | | using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>; |
1343 | | |
1344 | | template <typename Impl> |
1345 | | class FunctionJsonbLength : public IFunction { |
1346 | | public: |
1347 | | static constexpr auto name = "json_length"; |
1348 | 1 | String get_name() const override { return name; } |
1349 | 10 | static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); } |
1350 | | |
1351 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1352 | 2 | return make_nullable(std::make_shared<DataTypeInt32>()); |
1353 | 2 | } |
1354 | 9 | DataTypes get_variadic_argument_types_impl() const override { |
1355 | 9 | return Impl::get_variadic_argument_types(); |
1356 | 9 | } |
1357 | 2 | size_t get_number_of_arguments() const override { |
1358 | 2 | return get_variadic_argument_types_impl().size(); |
1359 | 2 | } |
1360 | | |
1361 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
1362 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1363 | 2 | uint32_t result, size_t input_rows_count) const override { |
1364 | 2 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1365 | 2 | } |
1366 | | }; |
1367 | | |
1368 | | struct JsonbLengthUtil { |
1369 | | static Status jsonb_length_execute(FunctionContext* context, Block& block, |
1370 | | const ColumnNumbers& arguments, uint32_t result, |
1371 | 2 | size_t input_rows_count) { |
1372 | 2 | DCHECK_GE(arguments.size(), 2); |
1373 | 2 | ColumnPtr jsonb_data_column; |
1374 | 2 | bool jsonb_data_const = false; |
1375 | | // prepare jsonb data column |
1376 | 2 | std::tie(jsonb_data_column, jsonb_data_const) = |
1377 | 2 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1378 | 2 | ColumnPtr path_column; |
1379 | 2 | bool is_const = false; |
1380 | 2 | std::tie(path_column, is_const) = |
1381 | 2 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1382 | | |
1383 | 2 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1384 | 2 | auto return_type = block.get_data_type(result); |
1385 | 2 | MutableColumnPtr res = return_type->create_column(); |
1386 | | |
1387 | 2 | JsonbPath path; |
1388 | 2 | if (is_const) { |
1389 | 1 | if (path_column->is_null_at(0)) { |
1390 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1391 | 1 | null_map->get_data()[i] = 1; |
1392 | 1 | res->insert_data(nullptr, 0); |
1393 | 1 | } |
1394 | | |
1395 | 1 | block.replace_by_position( |
1396 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1397 | 1 | return Status::OK(); |
1398 | 1 | } |
1399 | | |
1400 | 0 | auto path_value = path_column->get_data_at(0); |
1401 | 0 | if (!path.seek(path_value.data, path_value.size)) { |
1402 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1403 | 0 | std::string_view(path_value.data, path_value.size)); |
1404 | 0 | } |
1405 | 0 | } |
1406 | | |
1407 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { |
1408 | 4 | if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) || |
1409 | 4 | (jsonb_data_column->get_data_at(i).size == 0)) { |
1410 | 1 | null_map->get_data()[i] = 1; |
1411 | 1 | res->insert_data(nullptr, 0); |
1412 | 1 | continue; |
1413 | 1 | } |
1414 | 3 | if (!is_const) { |
1415 | 3 | auto path_value = path_column->get_data_at(i); |
1416 | 3 | path.clean(); |
1417 | 3 | if (!path.seek(path_value.data, path_value.size)) { |
1418 | 0 | return Status::InvalidArgument( |
1419 | 0 | "Json path error: Invalid Json Path for value: {}", |
1420 | 0 | std::string_view(reinterpret_cast<const char*>(path_value.data), |
1421 | 0 | path_value.size)); |
1422 | 0 | } |
1423 | 3 | } |
1424 | 3 | auto jsonb_value = jsonb_data_column->get_data_at(i); |
1425 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1426 | 3 | const JsonbDocument* doc = nullptr; |
1427 | 3 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data, |
1428 | 3 | jsonb_value.size, &doc)); |
1429 | 3 | auto find_result = doc->getValue()->findValue(path); |
1430 | 3 | const auto* value = find_result.value; |
1431 | 3 | if (UNLIKELY(!value)) { |
1432 | 0 | null_map->get_data()[i] = 1; |
1433 | 0 | res->insert_data(nullptr, 0); |
1434 | 0 | continue; |
1435 | 0 | } |
1436 | 3 | auto length = value->numElements(); |
1437 | 3 | res->insert_data(const_cast<const char*>((char*)&length), 0); |
1438 | 3 | } |
1439 | 1 | block.replace_by_position(result, |
1440 | 1 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1441 | 1 | return Status::OK(); |
1442 | 1 | } |
1443 | | }; |
1444 | | |
1445 | | struct JsonbLengthAndPathImpl { |
1446 | 9 | static DataTypes get_variadic_argument_types() { |
1447 | 9 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()}; |
1448 | 9 | } |
1449 | | |
1450 | | static Status execute_impl(FunctionContext* context, Block& block, |
1451 | | const ColumnNumbers& arguments, uint32_t result, |
1452 | 2 | size_t input_rows_count) { |
1453 | 2 | return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result, |
1454 | 2 | input_rows_count); |
1455 | 2 | } |
1456 | | }; |
1457 | | |
1458 | | template <typename Impl> |
1459 | | class FunctionJsonbContains : public IFunction { |
1460 | | public: |
1461 | | static constexpr auto name = "json_contains"; |
1462 | 1 | String get_name() const override { return name; } |
1463 | 10 | static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); } |
1464 | | |
1465 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1466 | 2 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
1467 | 2 | } |
1468 | 9 | DataTypes get_variadic_argument_types_impl() const override { |
1469 | 9 | return Impl::get_variadic_argument_types(); |
1470 | 9 | } |
1471 | 2 | size_t get_number_of_arguments() const override { |
1472 | 2 | return get_variadic_argument_types_impl().size(); |
1473 | 2 | } |
1474 | | |
1475 | 4 | bool use_default_implementation_for_nulls() const override { return false; } |
1476 | | |
1477 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1478 | 2 | uint32_t result, size_t input_rows_count) const override { |
1479 | 2 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
1480 | 2 | } |
1481 | | }; |
1482 | | |
1483 | | struct JsonbContainsUtil { |
1484 | | static Status jsonb_contains_execute(FunctionContext* context, Block& block, |
1485 | | const ColumnNumbers& arguments, uint32_t result, |
1486 | 2 | size_t input_rows_count) { |
1487 | 2 | DCHECK_GE(arguments.size(), 3); |
1488 | | |
1489 | 2 | auto jsonb_data1_column = block.get_by_position(arguments[0]).column; |
1490 | 2 | auto jsonb_data2_column = block.get_by_position(arguments[1]).column; |
1491 | | |
1492 | 2 | ColumnPtr path_column; |
1493 | 2 | bool is_const = false; |
1494 | 2 | std::tie(path_column, is_const) = |
1495 | 2 | unpack_if_const(block.get_by_position(arguments[2]).column); |
1496 | | |
1497 | 2 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1498 | 2 | auto return_type = block.get_data_type(result); |
1499 | 2 | MutableColumnPtr res = return_type->create_column(); |
1500 | | |
1501 | 2 | JsonbPath path; |
1502 | 2 | if (is_const) { |
1503 | 1 | if (path_column->is_null_at(0)) { |
1504 | 2 | for (size_t i = 0; i < input_rows_count; ++i) { |
1505 | 1 | null_map->get_data()[i] = 1; |
1506 | 1 | res->insert_data(nullptr, 0); |
1507 | 1 | } |
1508 | | |
1509 | 1 | block.replace_by_position( |
1510 | 1 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1511 | 1 | return Status::OK(); |
1512 | 1 | } |
1513 | | |
1514 | 0 | auto path_value = path_column->get_data_at(0); |
1515 | 0 | if (!path.seek(path_value.data, path_value.size)) { |
1516 | 0 | return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}", |
1517 | 0 | std::string_view(path_value.data, path_value.size)); |
1518 | 0 | } |
1519 | 0 | } |
1520 | | |
1521 | 5 | for (size_t i = 0; i < input_rows_count; ++i) { |
1522 | 4 | if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) || |
1523 | 4 | path_column->is_null_at(i)) { |
1524 | 1 | null_map->get_data()[i] = 1; |
1525 | 1 | res->insert_data(nullptr, 0); |
1526 | 1 | continue; |
1527 | 1 | } |
1528 | | |
1529 | 3 | if (!is_const) { |
1530 | 3 | auto path_value = path_column->get_data_at(i); |
1531 | 3 | path.clean(); |
1532 | 3 | if (!path.seek(path_value.data, path_value.size)) { |
1533 | 0 | return Status::InvalidArgument( |
1534 | 0 | "Json path error: Invalid Json Path for value: {}", |
1535 | 0 | std::string_view(path_value.data, path_value.size)); |
1536 | 0 | } |
1537 | 3 | } |
1538 | | |
1539 | 3 | auto jsonb_value1 = jsonb_data1_column->get_data_at(i); |
1540 | 3 | auto jsonb_value2 = jsonb_data2_column->get_data_at(i); |
1541 | | |
1542 | 3 | if (jsonb_value1.size == 0 || jsonb_value2.size == 0) { |
1543 | 1 | null_map->get_data()[i] = 1; |
1544 | 1 | res->insert_data(nullptr, 0); |
1545 | 1 | continue; |
1546 | 1 | } |
1547 | | // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
1548 | 2 | const JsonbDocument* doc1 = nullptr; |
1549 | 2 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data, |
1550 | 2 | jsonb_value1.size, &doc1)); |
1551 | 2 | const JsonbDocument* doc2 = nullptr; |
1552 | 2 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data, |
1553 | 2 | jsonb_value2.size, &doc2)); |
1554 | | |
1555 | 2 | auto find_result = doc1->getValue()->findValue(path); |
1556 | 2 | const auto* value1 = find_result.value; |
1557 | 2 | const JsonbValue* value2 = doc2->getValue(); |
1558 | 2 | if (!value1 || !value2) { |
1559 | 0 | null_map->get_data()[i] = 1; |
1560 | 0 | res->insert_data(nullptr, 0); |
1561 | 0 | continue; |
1562 | 0 | } |
1563 | 2 | auto contains_value = value1->contains(value2); |
1564 | 2 | res->insert_data(const_cast<const char*>((char*)&contains_value), 0); |
1565 | 2 | } |
1566 | | |
1567 | 1 | block.replace_by_position(result, |
1568 | 1 | ColumnNullable::create(std::move(res), std::move(null_map))); |
1569 | 1 | return Status::OK(); |
1570 | 1 | } |
1571 | | }; |
1572 | | |
1573 | | template <bool ignore_null> |
1574 | | class FunctionJsonbArray : public IFunction { |
1575 | | public: |
1576 | | static constexpr auto name = "json_array"; |
1577 | | static constexpr auto alias = "jsonb_array"; |
1578 | | |
1579 | 18 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }_ZN5doris18FunctionJsonbArrayILb0EE6createEv Line | Count | Source | 1579 | 9 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
_ZN5doris18FunctionJsonbArrayILb1EE6createEv Line | Count | Source | 1579 | 9 | static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); } |
|
1580 | | |
1581 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev |
1582 | | |
1583 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv |
1584 | 4 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv Line | Count | Source | 1584 | 2 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv Line | Count | Source | 1584 | 2 | bool is_variadic() const override { return true; } |
|
1585 | | |
1586 | 4 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 1586 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 1586 | 2 | bool use_default_implementation_for_nulls() const override { return false; } |
|
1587 | | |
1588 | 2 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1589 | 2 | return std::make_shared<DataTypeJsonb>(); |
1590 | 2 | } _ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1588 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1589 | 1 | return std::make_shared<DataTypeJsonb>(); | 1590 | 1 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 1588 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1589 | 1 | return std::make_shared<DataTypeJsonb>(); | 1590 | 1 | } |
|
1591 | | |
1592 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1593 | 2 | uint32_t result, size_t input_rows_count) const override { |
1594 | 2 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1595 | 2 | auto column = return_data_type->create_column(); |
1596 | 2 | column->reserve(input_rows_count); |
1597 | | |
1598 | 2 | JsonbWriter writer; |
1599 | 23 | for (size_t i = 0; i < input_rows_count; ++i) { |
1600 | 20 | writer.writeStartArray(); |
1601 | 20 | for (auto argument : arguments) { |
1602 | 20 | auto&& [arg_column, is_const] = |
1603 | 20 | unpack_if_const(block.get_by_position(argument).column); |
1604 | 20 | if (arg_column->is_nullable()) { |
1605 | 20 | const auto& nullable_column = |
1606 | 20 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1607 | 20 | *arg_column); |
1608 | 20 | const auto& null_map = nullable_column.get_null_map_data(); |
1609 | 20 | const auto& nested_column = nullable_column.get_nested_column(); |
1610 | 20 | const auto& jsonb_column = |
1611 | 20 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1612 | 20 | nested_column); |
1613 | | |
1614 | 20 | auto index = index_check_const(i, is_const); |
1615 | 20 | if (null_map[index]) { |
1616 | 2 | if constexpr (ignore_null) { |
1617 | 1 | continue; |
1618 | 1 | } else { |
1619 | 1 | writer.writeNull(); |
1620 | 1 | } |
1621 | 18 | } else { |
1622 | 18 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1623 | 18 | const JsonbDocument* doc = nullptr; |
1624 | 18 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1625 | 18 | jsonb_binary.size, &doc); |
1626 | 18 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1627 | 0 | if constexpr (ignore_null) { |
1628 | 0 | continue; |
1629 | 0 | } else { |
1630 | 0 | writer.writeNull(); |
1631 | 0 | } |
1632 | 18 | } else { |
1633 | 18 | writer.writeValue(doc->getValue()); |
1634 | 18 | } |
1635 | 18 | } |
1636 | 20 | } else { |
1637 | 0 | const auto& jsonb_column = |
1638 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1639 | 0 | *arg_column); |
1640 | |
|
1641 | 0 | auto index = index_check_const(i, is_const); |
1642 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); |
1643 | 0 | const JsonbDocument* doc = nullptr; |
1644 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, |
1645 | 0 | jsonb_binary.size, &doc); |
1646 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { |
1647 | 0 | if constexpr (ignore_null) { |
1648 | 0 | continue; |
1649 | 0 | } else { |
1650 | 0 | writer.writeNull(); |
1651 | 0 | } |
1652 | 0 | } else { |
1653 | 0 | writer.writeValue(doc->getValue()); |
1654 | 0 | } |
1655 | 0 | } |
1656 | 20 | } |
1657 | 11 | writer.writeEndArray(); |
1658 | 11 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1659 | 11 | writer.reset(); |
1660 | 11 | } |
1661 | | |
1662 | 2 | block.get_by_position(result).column = std::move(column); |
1663 | 2 | return Status::OK(); |
1664 | 2 | } _ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1593 | 1 | uint32_t result, size_t input_rows_count) const override { | 1594 | 1 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1595 | 1 | auto column = return_data_type->create_column(); | 1596 | 1 | column->reserve(input_rows_count); | 1597 | | | 1598 | 1 | JsonbWriter writer; | 1599 | 11 | for (size_t i = 0; i < input_rows_count; ++i) { | 1600 | 10 | writer.writeStartArray(); | 1601 | 10 | for (auto argument : arguments) { | 1602 | 10 | auto&& [arg_column, is_const] = | 1603 | 10 | unpack_if_const(block.get_by_position(argument).column); | 1604 | 10 | if (arg_column->is_nullable()) { | 1605 | 10 | const auto& nullable_column = | 1606 | 10 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1607 | 10 | *arg_column); | 1608 | 10 | const auto& null_map = nullable_column.get_null_map_data(); | 1609 | 10 | const auto& nested_column = nullable_column.get_nested_column(); | 1610 | 10 | const auto& jsonb_column = | 1611 | 10 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1612 | 10 | nested_column); | 1613 | | | 1614 | 10 | auto index = index_check_const(i, is_const); | 1615 | 10 | if (null_map[index]) { | 1616 | | if constexpr (ignore_null) { | 1617 | | continue; | 1618 | 1 | } else { | 1619 | 1 | writer.writeNull(); | 1620 | 1 | } | 1621 | 9 | } else { | 1622 | 9 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1623 | 9 | const JsonbDocument* doc = nullptr; | 1624 | 9 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1625 | 9 | jsonb_binary.size, &doc); | 1626 | 9 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1627 | | if constexpr (ignore_null) { | 1628 | | continue; | 1629 | 0 | } else { | 1630 | 0 | writer.writeNull(); | 1631 | 0 | } | 1632 | 9 | } else { | 1633 | 9 | writer.writeValue(doc->getValue()); | 1634 | 9 | } | 1635 | 9 | } | 1636 | 10 | } else { | 1637 | 0 | const auto& jsonb_column = | 1638 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1639 | 0 | *arg_column); | 1640 | |
| 1641 | 0 | auto index = index_check_const(i, is_const); | 1642 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1643 | 0 | const JsonbDocument* doc = nullptr; | 1644 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1645 | 0 | jsonb_binary.size, &doc); | 1646 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1647 | | if constexpr (ignore_null) { | 1648 | | continue; | 1649 | 0 | } else { | 1650 | 0 | writer.writeNull(); | 1651 | 0 | } | 1652 | 0 | } else { | 1653 | 0 | writer.writeValue(doc->getValue()); | 1654 | 0 | } | 1655 | 0 | } | 1656 | 10 | } | 1657 | 10 | writer.writeEndArray(); | 1658 | 10 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1659 | 10 | writer.reset(); | 1660 | 10 | } | 1661 | | | 1662 | 1 | block.get_by_position(result).column = std::move(column); | 1663 | 1 | return Status::OK(); | 1664 | 1 | } |
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1593 | 1 | uint32_t result, size_t input_rows_count) const override { | 1594 | 1 | auto return_data_type = std::make_shared<DataTypeJsonb>(); | 1595 | 1 | auto column = return_data_type->create_column(); | 1596 | 1 | column->reserve(input_rows_count); | 1597 | | | 1598 | 1 | JsonbWriter writer; | 1599 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { | 1600 | 10 | writer.writeStartArray(); | 1601 | 10 | for (auto argument : arguments) { | 1602 | 10 | auto&& [arg_column, is_const] = | 1603 | 10 | unpack_if_const(block.get_by_position(argument).column); | 1604 | 10 | if (arg_column->is_nullable()) { | 1605 | 10 | const auto& nullable_column = | 1606 | 10 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( | 1607 | 10 | *arg_column); | 1608 | 10 | const auto& null_map = nullable_column.get_null_map_data(); | 1609 | 10 | const auto& nested_column = nullable_column.get_nested_column(); | 1610 | 10 | const auto& jsonb_column = | 1611 | 10 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1612 | 10 | nested_column); | 1613 | | | 1614 | 10 | auto index = index_check_const(i, is_const); | 1615 | 10 | if (null_map[index]) { | 1616 | 1 | if constexpr (ignore_null) { | 1617 | 1 | continue; | 1618 | | } else { | 1619 | | writer.writeNull(); | 1620 | | } | 1621 | 9 | } else { | 1622 | 9 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1623 | 9 | const JsonbDocument* doc = nullptr; | 1624 | 9 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1625 | 9 | jsonb_binary.size, &doc); | 1626 | 9 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1627 | 0 | if constexpr (ignore_null) { | 1628 | 0 | continue; | 1629 | | } else { | 1630 | | writer.writeNull(); | 1631 | | } | 1632 | 9 | } else { | 1633 | 9 | writer.writeValue(doc->getValue()); | 1634 | 9 | } | 1635 | 9 | } | 1636 | 10 | } else { | 1637 | 0 | const auto& jsonb_column = | 1638 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( | 1639 | 0 | *arg_column); | 1640 | |
| 1641 | 0 | auto index = index_check_const(i, is_const); | 1642 | 0 | auto jsonb_binary = jsonb_column.get_data_at(index); | 1643 | 0 | const JsonbDocument* doc = nullptr; | 1644 | 0 | auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data, | 1645 | 0 | jsonb_binary.size, &doc); | 1646 | 0 | if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] { | 1647 | 0 | if constexpr (ignore_null) { | 1648 | 0 | continue; | 1649 | | } else { | 1650 | | writer.writeNull(); | 1651 | | } | 1652 | 0 | } else { | 1653 | 0 | writer.writeValue(doc->getValue()); | 1654 | 0 | } | 1655 | 0 | } | 1656 | 10 | } | 1657 | 11 | writer.writeEndArray(); | 1658 | 11 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); | 1659 | 11 | writer.reset(); | 1660 | 11 | } | 1661 | | | 1662 | 2 | block.get_by_position(result).column = std::move(column); | 1663 | 2 | return Status::OK(); | 1664 | 1 | } |
|
1665 | | }; |
1666 | | |
1667 | | class FunctionJsonbObject : public IFunction { |
1668 | | public: |
1669 | | static constexpr auto name = "json_object"; |
1670 | | static constexpr auto alias = "jsonb_object"; |
1671 | | |
1672 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); } |
1673 | | |
1674 | 0 | String get_name() const override { return name; } |
1675 | | |
1676 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1677 | 1 | bool is_variadic() const override { return true; } |
1678 | | |
1679 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
1680 | | |
1681 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1682 | 0 | return std::make_shared<DataTypeJsonb>(); |
1683 | 0 | } |
1684 | | |
1685 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1686 | 0 | uint32_t result, size_t input_rows_count) const override { |
1687 | 0 | if (arguments.size() % 2 != 0) { |
1688 | 0 | return Status::InvalidArgument( |
1689 | 0 | "JSON object must have an even number of arguments, but got: {}", |
1690 | 0 | arguments.size()); |
1691 | 0 | } |
1692 | | |
1693 | 0 | auto return_data_type = std::make_shared<DataTypeJsonb>(); |
1694 | |
|
1695 | 0 | auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const, |
1696 | 0 | const NullMap* null_map, const size_t arg_index, const size_t row_idx) { |
1697 | 0 | auto index = index_check_const(row_idx, is_const); |
1698 | 0 | if (null_map && (*null_map)[index]) { |
1699 | 0 | return Status::InvalidArgument( |
1700 | 0 | "JSON documents may not contain NULL member name(argument " |
1701 | 0 | "index: " |
1702 | 0 | "{}, row index: {})", |
1703 | 0 | row_idx, arg_index); |
1704 | 0 | } |
1705 | | |
1706 | 0 | auto key_string = key_col.get_data_at(index); |
1707 | 0 | if (key_string.size > 255) { |
1708 | 0 | return Status::InvalidArgument( |
1709 | 0 | "JSON object keys(argument index: {}) must be less than 256 " |
1710 | 0 | "bytes, but got size: {}", |
1711 | 0 | arg_index, key_string.size); |
1712 | 0 | } |
1713 | 0 | writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size)); |
1714 | 0 | return Status::OK(); |
1715 | 0 | }; |
1716 | |
|
1717 | 0 | auto write_value = [](JsonbWriter& writer, const ColumnString& value_col, |
1718 | 0 | const bool is_const, const NullMap* null_map, const size_t arg_index, |
1719 | 0 | const size_t row_idx) { |
1720 | 0 | auto index = index_check_const(row_idx, is_const); |
1721 | 0 | if (null_map && (*null_map)[index]) { |
1722 | 0 | writer.writeNull(); |
1723 | 0 | return Status::OK(); |
1724 | 0 | } |
1725 | | |
1726 | 0 | auto value_string = value_col.get_data_at(index); |
1727 | 0 | const JsonbDocument* doc = nullptr; |
1728 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
1729 | 0 | value_string.size, &doc)); |
1730 | 0 | writer.writeValue(doc->getValue()); |
1731 | 0 | return Status::OK(); |
1732 | 0 | }; |
1733 | |
|
1734 | 0 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1735 | 0 | auto key_argument = arguments[arg_idx]; |
1736 | 0 | auto value_argument = arguments[arg_idx + 1]; |
1737 | |
|
1738 | 0 | auto& key_data_type = block.get_by_position(key_argument).type; |
1739 | 0 | auto& value_data_type = block.get_by_position(value_argument).type; |
1740 | 0 | if (!is_string_type(key_data_type->get_primitive_type())) { |
1741 | 0 | return Status::InvalidArgument( |
1742 | 0 | "JSON object key(argument index: {}) must be String, but got type: " |
1743 | 0 | "{}(primitive type: {})", |
1744 | 0 | arg_idx, key_data_type->get_name(), |
1745 | 0 | static_cast<int>(key_data_type->get_primitive_type())); |
1746 | 0 | } |
1747 | | |
1748 | 0 | if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) { |
1749 | 0 | return Status::InvalidArgument( |
1750 | 0 | "JSON object value(argument index: {}) must be JSON, but got type: {}", |
1751 | 0 | arg_idx, value_data_type->get_name()); |
1752 | 0 | } |
1753 | 0 | } |
1754 | | |
1755 | 0 | auto column = return_data_type->create_column(); |
1756 | 0 | column->reserve(input_rows_count); |
1757 | |
|
1758 | 0 | JsonbWriter writer; |
1759 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1760 | 0 | writer.writeStartObject(); |
1761 | 0 | for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) { |
1762 | 0 | auto key_argument = arguments[arg_idx]; |
1763 | 0 | auto value_argument = arguments[arg_idx + 1]; |
1764 | 0 | auto&& [key_column, key_const] = |
1765 | 0 | unpack_if_const(block.get_by_position(key_argument).column); |
1766 | 0 | auto&& [value_column, value_const] = |
1767 | 0 | unpack_if_const(block.get_by_position(value_argument).column); |
1768 | |
|
1769 | 0 | if (key_column->is_nullable()) { |
1770 | 0 | const auto& nullable_column = |
1771 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1772 | 0 | *key_column); |
1773 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
1774 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1775 | 0 | const auto& key_arg_column = |
1776 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1777 | 0 | nested_column); |
1778 | |
|
1779 | 0 | RETURN_IF_ERROR( |
1780 | 0 | write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i)); |
1781 | 0 | } else { |
1782 | 0 | const auto& key_arg_column = |
1783 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1784 | 0 | *key_column); |
1785 | 0 | RETURN_IF_ERROR( |
1786 | 0 | write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i)); |
1787 | 0 | } |
1788 | | |
1789 | 0 | if (value_column->is_nullable()) { |
1790 | 0 | const auto& nullable_column = |
1791 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>( |
1792 | 0 | *value_column); |
1793 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
1794 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1795 | 0 | const auto& value_arg_column = |
1796 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1797 | 0 | nested_column); |
1798 | |
|
1799 | 0 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map, |
1800 | 0 | arg_idx + 1, i)); |
1801 | 0 | } else { |
1802 | 0 | const auto& value_arg_column = |
1803 | 0 | assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>( |
1804 | 0 | *value_column); |
1805 | 0 | RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr, |
1806 | 0 | arg_idx + 1, i)); |
1807 | 0 | } |
1808 | 0 | } |
1809 | | |
1810 | 0 | writer.writeEndObject(); |
1811 | 0 | column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
1812 | 0 | writer.reset(); |
1813 | 0 | } |
1814 | | |
1815 | 0 | block.get_by_position(result).column = std::move(column); |
1816 | 0 | return Status::OK(); |
1817 | 0 | } |
1818 | | }; |
1819 | | |
1820 | | enum class JsonbModifyType { Insert, Set, Replace }; |
1821 | | |
1822 | | template <JsonbModifyType modify_type> |
1823 | | struct JsonbModifyName { |
1824 | | static constexpr auto name = "jsonb_modify"; |
1825 | | static constexpr auto alias = "json_modify"; |
1826 | | }; |
1827 | | |
1828 | | template <> |
1829 | | struct JsonbModifyName<JsonbModifyType::Insert> { |
1830 | | static constexpr auto name = "jsonb_insert"; |
1831 | | static constexpr auto alias = "json_insert"; |
1832 | | }; |
1833 | | template <> |
1834 | | struct JsonbModifyName<JsonbModifyType::Set> { |
1835 | | static constexpr auto name = "jsonb_set"; |
1836 | | static constexpr auto alias = "json_set"; |
1837 | | }; |
1838 | | template <> |
1839 | | struct JsonbModifyName<JsonbModifyType::Replace> { |
1840 | | static constexpr auto name = "jsonb_replace"; |
1841 | | static constexpr auto alias = "json_replace"; |
1842 | | }; |
1843 | | |
1844 | | template <JsonbModifyType modify_type> |
1845 | | class FunctionJsonbModify : public IFunction { |
1846 | | public: |
1847 | | static constexpr auto name = JsonbModifyName<modify_type>::name; |
1848 | | static constexpr auto alias = JsonbModifyName<modify_type>::alias; |
1849 | | |
1850 | 24 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv Line | Count | Source | 1850 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv Line | Count | Source | 1850 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv Line | Count | Source | 1850 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); } |
|
1851 | | |
1852 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev |
1853 | | |
1854 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv |
1855 | 3 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv Line | Count | Source | 1855 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv Line | Count | Source | 1855 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv Line | Count | Source | 1855 | 1 | bool is_variadic() const override { return true; } |
|
1856 | | |
1857 | 0 | bool use_default_implementation_for_nulls() const override { return false; }Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv |
1858 | | |
1859 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1860 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
1861 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
1862 | | |
1863 | | Status create_all_null_result(const DataTypePtr& return_data_type, Block& block, |
1864 | 0 | uint32_t result, size_t input_rows_count) const { |
1865 | 0 | auto result_column = return_data_type->create_column(); |
1866 | 0 | result_column->insert_default(); |
1867 | 0 | auto const_column = ColumnConst::create(std::move(result_column), input_rows_count); |
1868 | 0 | block.get_by_position(result).column = std::move(const_column); |
1869 | 0 | return Status::OK(); |
1870 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm |
1871 | | |
1872 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1873 | 0 | uint32_t result, size_t input_rows_count) const override { |
1874 | 0 | if (arguments.size() % 2 != 1 || arguments.size() < 3) { |
1875 | 0 | return Status::InvalidArgument( |
1876 | 0 | "Function {} must have an odd number of arguments and more than 2 arguments, " |
1877 | 0 | "but got: {}", |
1878 | 0 | name, arguments.size()); |
1879 | 0 | } |
1880 | | |
1881 | 0 | const size_t keys_count = (arguments.size() - 1) / 2; |
1882 | |
|
1883 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
1884 | |
|
1885 | 0 | auto result_column = return_data_type->create_column(); |
1886 | 0 | auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column); |
1887 | 0 | auto& null_map = result_nullable_col.get_null_map_data(); |
1888 | 0 | auto& res_string_column = |
1889 | 0 | assert_cast<ColumnString&>(result_nullable_col.get_nested_column()); |
1890 | 0 | auto& res_chars = res_string_column.get_chars(); |
1891 | 0 | auto& res_offsets = res_string_column.get_offsets(); |
1892 | |
|
1893 | 0 | null_map.resize_fill(input_rows_count, 0); |
1894 | 0 | res_offsets.resize(input_rows_count); |
1895 | 0 | auto&& [json_data_arg_column, json_data_const] = |
1896 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1897 | |
|
1898 | 0 | if (json_data_const) { |
1899 | 0 | if (json_data_arg_column->is_null_at(0)) { |
1900 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1901 | 0 | } |
1902 | 0 | } |
1903 | | |
1904 | 0 | std::vector<const ColumnString*> json_path_columns(keys_count); |
1905 | 0 | std::vector<bool> json_path_constant(keys_count); |
1906 | 0 | std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr); |
1907 | |
|
1908 | 0 | std::vector<const ColumnString*> json_value_columns(keys_count); |
1909 | 0 | std::vector<bool> json_value_constant(keys_count); |
1910 | 0 | std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr); |
1911 | |
|
1912 | 0 | const NullMap* json_data_null_map = nullptr; |
1913 | 0 | const ColumnString* json_data_column; |
1914 | 0 | if (json_data_arg_column->is_nullable()) { |
1915 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column); |
1916 | 0 | json_data_null_map = &nullable_column.get_null_map_data(); |
1917 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1918 | 0 | json_data_column = assert_cast<const ColumnString*>(&nested_column); |
1919 | 0 | } else { |
1920 | 0 | json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get()); |
1921 | 0 | } |
1922 | |
|
1923 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
1924 | 0 | auto&& [path_column, path_const] = |
1925 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
1926 | 0 | auto&& [value_column, value_const] = |
1927 | 0 | unpack_if_const(block.get_by_position(arguments[i + 1]).column); |
1928 | |
|
1929 | 0 | if (path_const) { |
1930 | 0 | if (path_column->is_null_at(0)) { |
1931 | 0 | return create_all_null_result(return_data_type, block, result, |
1932 | 0 | input_rows_count); |
1933 | 0 | } |
1934 | 0 | } |
1935 | | |
1936 | 0 | json_path_constant[i / 2] = path_const; |
1937 | 0 | if (path_column->is_nullable()) { |
1938 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column); |
1939 | 0 | json_path_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1940 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1941 | 0 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1942 | 0 | } else { |
1943 | 0 | json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get()); |
1944 | 0 | } |
1945 | |
|
1946 | 0 | json_value_constant[i / 2] = value_const; |
1947 | 0 | if (value_column->is_nullable()) { |
1948 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column); |
1949 | 0 | json_value_null_maps[i / 2] = &nullable_column.get_null_map_data(); |
1950 | 0 | const auto& nested_column = nullable_column.get_nested_column(); |
1951 | 0 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column); |
1952 | 0 | } else { |
1953 | 0 | json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get()); |
1954 | 0 | } |
1955 | 0 | } |
1956 | | |
1957 | 0 | DorisVector<const JsonbDocument*> json_documents(input_rows_count); |
1958 | 0 | if (json_data_const) { |
1959 | 0 | auto json_data_string = json_data_column->get_data_at(0); |
1960 | 0 | const JsonbDocument* doc = nullptr; |
1961 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1962 | 0 | json_data_string.size, &doc)); |
1963 | 0 | if (!doc || !doc->getValue()) [[unlikely]] { |
1964 | 0 | return create_all_null_result(return_data_type, block, result, input_rows_count); |
1965 | 0 | } |
1966 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1967 | 0 | json_documents[i] = doc; |
1968 | 0 | } |
1969 | 0 | } else { |
1970 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
1971 | 0 | if (json_data_null_map && (*json_data_null_map)[i]) { |
1972 | 0 | null_map[i] = 1; |
1973 | 0 | json_documents[i] = nullptr; |
1974 | 0 | continue; |
1975 | 0 | } |
1976 | | |
1977 | 0 | auto json_data_string = json_data_column->get_data_at(i); |
1978 | 0 | const JsonbDocument* doc = nullptr; |
1979 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data, |
1980 | 0 | json_data_string.size, &doc)); |
1981 | 0 | if (!doc || !doc->getValue()) [[unlikely]] { |
1982 | 0 | null_map[i] = 1; |
1983 | 0 | continue; |
1984 | 0 | } |
1985 | 0 | json_documents[i] = doc; |
1986 | 0 | } |
1987 | 0 | } |
1988 | | |
1989 | 0 | DorisVector<DorisVector<JsonbPath>> json_paths(keys_count); |
1990 | 0 | DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count); |
1991 | |
|
1992 | 0 | RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count, |
1993 | 0 | json_path_columns, json_path_constant, |
1994 | 0 | json_path_null_maps, json_value_columns, |
1995 | 0 | json_value_constant, json_value_null_maps)); |
1996 | | |
1997 | 0 | JsonbWriter writer; |
1998 | 0 | struct DocumentBuffer { |
1999 | 0 | DorisUniqueBufferPtr<char> ptr; |
2000 | 0 | size_t size = 0; |
2001 | 0 | size_t capacity = 0; |
2002 | 0 | }; |
2003 | |
|
2004 | 0 | DocumentBuffer tmp_buffer; |
2005 | |
|
2006 | 0 | for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) { |
2007 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2008 | 0 | const size_t index = i / 2; |
2009 | 0 | auto& json_path = json_paths[index]; |
2010 | 0 | auto& json_value = json_values[index]; |
2011 | |
|
2012 | 0 | const auto path_index = index_check_const(row_idx, json_path_constant[index]); |
2013 | 0 | const auto value_index = index_check_const(row_idx, json_value_constant[index]); |
2014 | |
|
2015 | 0 | if (null_map[row_idx]) { |
2016 | 0 | continue; |
2017 | 0 | } |
2018 | | |
2019 | 0 | if (json_documents[row_idx] == nullptr) { |
2020 | 0 | null_map[row_idx] = 1; |
2021 | 0 | continue; |
2022 | 0 | } |
2023 | | |
2024 | 0 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) { |
2025 | 0 | null_map[row_idx] = 1; |
2026 | 0 | continue; |
2027 | 0 | } |
2028 | | |
2029 | 0 | auto find_result = |
2030 | 0 | json_documents[row_idx]->getValue()->findValue(json_path[path_index]); |
2031 | |
|
2032 | 0 | if (find_result.is_wildcard) { |
2033 | 0 | return Status::InvalidArgument( |
2034 | 0 | " In this situation, path expressions may not contain the * and ** " |
2035 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2036 | 0 | i, row_idx); |
2037 | 0 | } |
2038 | | |
2039 | 0 | if constexpr (modify_type == JsonbModifyType::Insert) { |
2040 | 0 | if (find_result.value) { |
2041 | 0 | continue; |
2042 | 0 | } |
2043 | 0 | } else if constexpr (modify_type == JsonbModifyType::Replace) { |
2044 | 0 | if (!find_result.value) { |
2045 | 0 | continue; |
2046 | 0 | } |
2047 | 0 | } |
2048 | | |
2049 | 0 | std::vector<const JsonbValue*> parents; |
2050 | |
|
2051 | 0 | bool replace = false; |
2052 | 0 | parents.emplace_back(json_documents[row_idx]->getValue()); |
2053 | 0 | if (find_result.value) { |
2054 | | // find target path, replace it with the new value. |
2055 | 0 | replace = true; |
2056 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), |
2057 | 0 | json_path[path_index], parents)) { |
2058 | 0 | DCHECK(false); |
2059 | 0 | continue; |
2060 | 0 | } |
2061 | 0 | } else { |
2062 | | // does not find target path, insert the new value. |
2063 | 0 | JsonbPath new_path; |
2064 | 0 | for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) { |
2065 | 0 | auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j); |
2066 | 0 | std::unique_ptr<leg_info> leg = std::make_unique<leg_info>( |
2067 | 0 | current_leg->leg_ptr, current_leg->leg_len, |
2068 | 0 | current_leg->array_index, current_leg->type); |
2069 | 0 | new_path.add_leg_to_leg_vector(std::move(leg)); |
2070 | 0 | } |
2071 | |
|
2072 | 0 | if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path, |
2073 | 0 | parents)) { |
2074 | 0 | continue; |
2075 | 0 | } |
2076 | 0 | } |
2077 | | |
2078 | 0 | const auto legs_count = json_path[path_index].get_leg_vector_size(); |
2079 | 0 | leg_info* last_leg = |
2080 | 0 | legs_count > 0 |
2081 | 0 | ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1) |
2082 | 0 | : nullptr; |
2083 | 0 | RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0, |
2084 | 0 | json_value[value_index], replace, last_leg, |
2085 | 0 | writer)); |
2086 | | |
2087 | 0 | auto* writer_output = writer.getOutput(); |
2088 | 0 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2089 | 0 | tmp_buffer.capacity = |
2090 | 0 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2091 | 0 | tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity); |
2092 | 0 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2093 | 0 | } |
2094 | |
|
2095 | 0 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize()); |
2096 | 0 | tmp_buffer.size = writer_output->getSize(); |
2097 | |
|
2098 | 0 | writer.reset(); |
2099 | |
|
2100 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2101 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx])); |
2102 | 0 | } |
2103 | | |
2104 | 0 | if (!null_map[row_idx]) { |
2105 | 0 | const auto* jsonb_document = json_documents[row_idx]; |
2106 | 0 | const auto size = jsonb_document->numPackedBytes(); |
2107 | 0 | res_chars.insert(reinterpret_cast<const char*>(jsonb_document), |
2108 | 0 | reinterpret_cast<const char*>(jsonb_document) + size); |
2109 | 0 | } |
2110 | |
|
2111 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2112 | |
|
2113 | 0 | if (!null_map[row_idx]) { |
2114 | 0 | auto* ptr = res_chars.data() + res_offsets[row_idx - 1]; |
2115 | 0 | auto size = res_offsets[row_idx] - res_offsets[row_idx - 1]; |
2116 | 0 | const JsonbDocument* doc = nullptr; |
2117 | 0 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2118 | 0 | reinterpret_cast<const char*>(ptr), size, &doc)); |
2119 | 0 | } |
2120 | 0 | } |
2121 | | |
2122 | 0 | block.get_by_position(result).column = std::move(result_column); |
2123 | 0 | return Status::OK(); |
2124 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
2125 | | |
2126 | | bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path, |
2127 | 0 | std::vector<const JsonbValue*>& parents) const { |
2128 | 0 | const size_t index = parents.size() - 1; |
2129 | 0 | if (index == path.get_leg_vector_size()) { |
2130 | 0 | return true; |
2131 | 0 | } |
2132 | | |
2133 | 0 | JsonbPath current; |
2134 | 0 | auto* current_leg = path.get_leg_from_leg_vector(index); |
2135 | 0 | std::unique_ptr<leg_info> leg = |
2136 | 0 | std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len, |
2137 | 0 | current_leg->array_index, current_leg->type); |
2138 | 0 | current.add_leg_to_leg_vector(std::move(leg)); |
2139 | |
|
2140 | 0 | auto find_result = root->findValue(current); |
2141 | 0 | if (!find_result.value) { |
2142 | 0 | std::string path_string; |
2143 | 0 | current.to_string(&path_string); |
2144 | 0 | return false; |
2145 | 0 | } else if (find_result.value == root) { |
2146 | 0 | return true; |
2147 | 0 | } else { |
2148 | 0 | parents.emplace_back(find_result.value); |
2149 | 0 | } |
2150 | | |
2151 | 0 | return build_parents_by_path(find_result.value, path, parents); |
2152 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE |
2153 | | |
2154 | | Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents, |
2155 | | const size_t parent_index, const JsonbValue* value, const bool replace, |
2156 | 0 | const leg_info* last_leg, JsonbWriter& writer) const { |
2157 | 0 | if (parent_index >= parents.size()) { |
2158 | 0 | return Status::InvalidArgument( |
2159 | 0 | "JsonbModify: parent_index {} is out of bounds for parents size {}", |
2160 | 0 | parent_index, parents.size()); |
2161 | 0 | } |
2162 | | |
2163 | 0 | if (parents[parent_index] != root) { |
2164 | 0 | return Status::InvalidArgument( |
2165 | 0 | "JsonbModify: parent value does not match root value, parent_index: {}, " |
2166 | 0 | "parents size: {}", |
2167 | 0 | parent_index, parents.size()); |
2168 | 0 | } |
2169 | | |
2170 | 0 | if (parent_index == parents.size() - 1 && replace) { |
2171 | | // We are at the last parent, write the value directly |
2172 | 0 | if (value == nullptr) { |
2173 | 0 | writer.writeNull(); |
2174 | 0 | } else { |
2175 | 0 | writer.writeValue(value); |
2176 | 0 | } |
2177 | 0 | return Status::OK(); |
2178 | 0 | } |
2179 | | |
2180 | 0 | bool value_written = false; |
2181 | 0 | bool is_last_parent = (parent_index == parents.size() - 1); |
2182 | 0 | const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1]; |
2183 | 0 | if (root->isArray()) { |
2184 | 0 | writer.writeStartArray(); |
2185 | 0 | const auto* array_val = root->unpack<ArrayVal>(); |
2186 | 0 | for (int i = 0; i != array_val->numElem(); ++i) { |
2187 | 0 | auto* it = array_val->get(i); |
2188 | |
|
2189 | 0 | if (is_last_parent && last_leg->array_index == i) { |
2190 | 0 | value_written = true; |
2191 | 0 | writer.writeValue(value); |
2192 | 0 | } else if (it == next_parent) { |
2193 | 0 | value_written = true; |
2194 | 0 | RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace, |
2195 | 0 | last_leg, writer)); |
2196 | 0 | } else { |
2197 | 0 | writer.writeValue(it); |
2198 | 0 | } |
2199 | 0 | } |
2200 | 0 | if (is_last_parent && !value_written) { |
2201 | 0 | value_written = true; |
2202 | 0 | writer.writeValue(value); |
2203 | 0 | } |
2204 | |
|
2205 | 0 | writer.writeEndArray(); |
2206 | |
|
2207 | 0 | } else { |
2208 | | /** |
2209 | | Because even for a non-array object, `$[0]` can still point to that object: |
2210 | | ``` |
2211 | | select json_extract('{"key": "value"}', '$[0]'); |
2212 | | +------------------------------------------+ |
2213 | | | json_extract('{"key": "value"}', '$[0]') | |
2214 | | +------------------------------------------+ |
2215 | | | {"key": "value"} | |
2216 | | +------------------------------------------+ |
2217 | | ``` |
2218 | | So when inserting an element into `$[1]`, even if '$' does not represent an array, |
2219 | | it should be converted to an array before insertion: |
2220 | | ``` |
2221 | | select json_insert('123','$[1]', null); |
2222 | | +---------------------------------+ |
2223 | | | json_insert('123','$[1]', null) | |
2224 | | +---------------------------------+ |
2225 | | | [123, null] | |
2226 | | +---------------------------------+ |
2227 | | ``` |
2228 | | */ |
2229 | 0 | if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) { |
2230 | 0 | writer.writeStartArray(); |
2231 | 0 | writer.writeValue(root); |
2232 | 0 | writer.writeValue(value); |
2233 | 0 | writer.writeEndArray(); |
2234 | 0 | return Status::OK(); |
2235 | 0 | } else if (root->isObject()) { |
2236 | 0 | writer.writeStartObject(); |
2237 | 0 | const auto* object_val = root->unpack<ObjectVal>(); |
2238 | 0 | for (const auto& it : *object_val) { |
2239 | 0 | writer.writeKey(it.getKeyStr(), it.klen()); |
2240 | 0 | if (it.value() == next_parent) { |
2241 | 0 | value_written = true; |
2242 | 0 | RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1, |
2243 | 0 | value, replace, last_leg, writer)); |
2244 | 0 | } else { |
2245 | 0 | writer.writeValue(it.value()); |
2246 | 0 | } |
2247 | 0 | } |
2248 | | |
2249 | 0 | if (is_last_parent && !value_written) { |
2250 | 0 | value_written = true; |
2251 | 0 | writer.writeStartObject(); |
2252 | 0 | writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len)); |
2253 | 0 | writer.writeValue(value); |
2254 | 0 | writer.writeEndObject(); |
2255 | 0 | } |
2256 | 0 | writer.writeEndObject(); |
2257 | |
|
2258 | 0 | } else { |
2259 | 0 | return Status::InvalidArgument("Cannot insert value into this type"); |
2260 | 0 | } |
2261 | 0 | } |
2262 | | |
2263 | 0 | if (!value_written) { |
2264 | 0 | return Status::InvalidArgument( |
2265 | 0 | "JsonbModify: value not written, parent_index: {}, parents size: {}", |
2266 | 0 | parent_index, parents.size()); |
2267 | 0 | } |
2268 | | |
2269 | 0 | return Status::OK(); |
2270 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE |
2271 | | |
2272 | | Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths, |
2273 | | DorisVector<DorisVector<const JsonbValue*>>& json_values, |
2274 | | const ColumnNumbers& arguments, const size_t input_rows_count, |
2275 | | const std::vector<const ColumnString*>& json_path_columns, |
2276 | | const std::vector<bool>& json_path_constant, |
2277 | | const std::vector<const NullMap*>& json_path_null_maps, |
2278 | | const std::vector<const ColumnString*>& json_value_columns, |
2279 | | const std::vector<bool>& json_value_constant, |
2280 | 0 | const std::vector<const NullMap*>& json_value_null_maps) const { |
2281 | 0 | for (size_t i = 1; i < arguments.size(); i += 2) { |
2282 | 0 | const size_t index = i / 2; |
2283 | 0 | const auto* json_path_column = json_path_columns[index]; |
2284 | 0 | const auto* value_column = json_value_columns[index]; |
2285 | |
|
2286 | 0 | json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count); |
2287 | 0 | json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr); |
2288 | |
|
2289 | 0 | for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) { |
2290 | 0 | if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) { |
2291 | 0 | continue; |
2292 | 0 | } |
2293 | | |
2294 | 0 | auto path_string = json_path_column->get_data_at(row_idx); |
2295 | 0 | if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) { |
2296 | 0 | return Status::InvalidArgument( |
2297 | 0 | "Json path error: Invalid Json Path for value: {}, " |
2298 | 0 | "argument " |
2299 | 0 | "index: {}, row index: {}", |
2300 | 0 | std::string_view(path_string.data, path_string.size), i, row_idx); |
2301 | 0 | } |
2302 | | |
2303 | 0 | if (json_paths[index][row_idx].is_wildcard()) { |
2304 | 0 | return Status::InvalidArgument( |
2305 | 0 | "In this situation, path expressions may not contain the * and ** " |
2306 | 0 | "tokens, argument index: {}, row index: {}", |
2307 | 0 | i, row_idx); |
2308 | 0 | } |
2309 | 0 | } |
2310 | | |
2311 | 0 | for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) { |
2312 | 0 | if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) { |
2313 | 0 | continue; |
2314 | 0 | } |
2315 | | |
2316 | 0 | auto value_string = value_column->get_data_at(row_idx); |
2317 | 0 | const JsonbDocument* doc = nullptr; |
2318 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data, |
2319 | 0 | value_string.size, &doc)); |
2320 | 0 | if (doc) { |
2321 | 0 | json_values[index][row_idx] = doc->getValue(); |
2322 | 0 | } |
2323 | 0 | } |
2324 | 0 | } |
2325 | | |
2326 | 0 | return Status::OK(); |
2327 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_ Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_ Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_ |
2328 | | }; |
2329 | | |
2330 | | struct JsonbContainsAndPathImpl { |
2331 | 9 | static DataTypes get_variadic_argument_types() { |
2332 | 9 | return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(), |
2333 | 9 | std::make_shared<DataTypeString>()}; |
2334 | 9 | } |
2335 | | |
2336 | | static Status execute_impl(FunctionContext* context, Block& block, |
2337 | | const ColumnNumbers& arguments, uint32_t result, |
2338 | 2 | size_t input_rows_count) { |
2339 | 2 | return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result, |
2340 | 2 | input_rows_count); |
2341 | 2 | } |
2342 | | }; |
2343 | | |
2344 | | class FunctionJsonSearch : public IFunction { |
2345 | | private: |
2346 | | using OneFun = std::function<Status(size_t, bool*)>; |
2347 | 0 | static Status always_one(size_t i, bool* res) { |
2348 | 0 | *res = true; |
2349 | 0 | return Status::OK(); |
2350 | 0 | } |
2351 | 0 | static Status always_all(size_t i, bool* res) { |
2352 | 0 | *res = false; |
2353 | 0 | return Status::OK(); |
2354 | 0 | } |
2355 | | |
2356 | | using CheckNullFun = std::function<bool(size_t)>; |
2357 | 0 | static bool always_not_null(size_t) { return false; } |
2358 | | |
2359 | | using GetJsonStringRefFun = std::function<StringRef(size_t)>; |
2360 | | |
2361 | 0 | Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const { |
2362 | 0 | StringRef pattern; // not used |
2363 | 0 | StringRef value_val(str.data(), str.size()); |
2364 | 0 | return (state->scalar_function)(&state->search_state, value_val, pattern, res); |
2365 | 0 | } |
2366 | | |
2367 | | /** |
2368 | | * Recursive search for matching string, if found, the result will be added to a vector |
2369 | | * @param element json element |
2370 | | * @param one_match |
2371 | | * @param search_str |
2372 | | * @param cur_path |
2373 | | * @param matches The path that has already been matched |
2374 | | * @return true if matched else false |
2375 | | */ |
2376 | | bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state, |
2377 | 0 | JsonbPath* cur_path, std::unordered_set<std::string>* matches) const { |
2378 | 0 | if (element->isString()) { |
2379 | 0 | const auto* json_string = element->unpack<JsonbStringVal>(); |
2380 | 0 | const std::string_view element_str(json_string->getBlob(), json_string->length()); |
2381 | 0 | unsigned char res; |
2382 | 0 | RETURN_IF_ERROR(matched(element_str, state, &res)); |
2383 | 0 | if (res) { |
2384 | 0 | std::string str; |
2385 | 0 | auto valid = cur_path->to_string(&str); |
2386 | 0 | if (!valid) { |
2387 | 0 | return false; |
2388 | 0 | } |
2389 | 0 | return matches->insert(str).second; |
2390 | 0 | } else { |
2391 | 0 | return false; |
2392 | 0 | } |
2393 | 0 | } else if (element->isObject()) { |
2394 | 0 | const auto* object = element->unpack<ObjectVal>(); |
2395 | 0 | bool find = false; |
2396 | 0 | for (const auto& item : *object) { |
2397 | 0 | Slice key(item.getKeyStr(), item.klen()); |
2398 | 0 | const auto* child_element = item.value(); |
2399 | | // construct an object member path leg. |
2400 | 0 | auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE); |
2401 | 0 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2402 | 0 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2403 | 0 | cur_path->pop_leg_from_leg_vector(); |
2404 | 0 | if (one_match && find) { |
2405 | 0 | return true; |
2406 | 0 | } |
2407 | 0 | } |
2408 | 0 | return find; |
2409 | 0 | } else if (element->isArray()) { |
2410 | 0 | const auto* array = element->unpack<ArrayVal>(); |
2411 | 0 | bool find = false; |
2412 | 0 | for (int i = 0; i < array->numElem(); ++i) { |
2413 | 0 | auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE); |
2414 | 0 | cur_path->add_leg_to_leg_vector(std::move(leg)); |
2415 | 0 | const auto* child_element = array->get(i); |
2416 | | // construct an array cell path leg. |
2417 | 0 | find |= find_matches(child_element, one_match, state, cur_path, matches); |
2418 | 0 | cur_path->pop_leg_from_leg_vector(); |
2419 | 0 | if (one_match && find) { |
2420 | 0 | return true; |
2421 | 0 | } |
2422 | 0 | } |
2423 | 0 | return find; |
2424 | 0 | } else { |
2425 | 0 | return false; |
2426 | 0 | } |
2427 | 0 | } |
2428 | | |
2429 | | void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches, |
2430 | 0 | ColumnString* result_col) const { |
2431 | 0 | if (matches.size() == 1) { |
2432 | 0 | for (const auto& str_ref : matches) { |
2433 | 0 | writer.writeStartString(); |
2434 | 0 | writer.writeString(str_ref); |
2435 | 0 | writer.writeEndString(); |
2436 | 0 | } |
2437 | 0 | } else { |
2438 | 0 | writer.writeStartArray(); |
2439 | 0 | for (const auto& str_ref : matches) { |
2440 | 0 | writer.writeStartString(); |
2441 | 0 | writer.writeString(str_ref); |
2442 | 0 | writer.writeEndString(); |
2443 | 0 | } |
2444 | 0 | writer.writeEndArray(); |
2445 | 0 | } |
2446 | |
|
2447 | 0 | result_col->insert_data(writer.getOutput()->getBuffer(), |
2448 | 0 | (size_t)writer.getOutput()->getSize()); |
2449 | 0 | } |
2450 | | |
2451 | | template <bool search_is_const> |
2452 | | Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check, |
2453 | | GetJsonStringRefFun col_json_string, CheckNullFun one_null_check, |
2454 | | OneFun one_check, CheckNullFun search_null_check, |
2455 | | const ColumnString* col_search_string, FunctionContext* context, |
2456 | 0 | size_t result) const { |
2457 | 0 | auto result_col = ColumnString::create(); |
2458 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
2459 | |
|
2460 | 0 | std::shared_ptr<LikeState> state_ptr; |
2461 | 0 | LikeState* state = nullptr; |
2462 | 0 | if (search_is_const) { |
2463 | 0 | state = reinterpret_cast<LikeState*>( |
2464 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
2465 | 0 | } |
2466 | |
|
2467 | 0 | bool is_one = false; |
2468 | |
|
2469 | 0 | JsonbWriter writer; |
2470 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
2471 | | // an error occurs if the json_doc argument is not a valid json document. |
2472 | 0 | if (json_null_check(i)) { |
2473 | 0 | null_map->get_data()[i] = 1; |
2474 | 0 | result_col->insert_data("", 0); |
2475 | 0 | continue; |
2476 | 0 | } |
2477 | 0 | const auto& json_doc_str = col_json_string(i); |
2478 | 0 | const JsonbDocument* json_doc = nullptr; |
2479 | 0 | auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size, |
2480 | 0 | &json_doc); |
2481 | 0 | if (!st.ok()) { |
2482 | 0 | return Status::InvalidArgument( |
2483 | 0 | "the json_doc argument at row {} is not a valid json document: {}", i, |
2484 | 0 | st.to_string()); |
2485 | 0 | } |
2486 | | |
2487 | 0 | if (!one_null_check(i)) { |
2488 | 0 | RETURN_IF_ERROR(one_check(i, &is_one)); |
2489 | 0 | } |
2490 | | |
2491 | 0 | if (one_null_check(i) || search_null_check(i)) { |
2492 | 0 | null_map->get_data()[i] = 1; |
2493 | 0 | result_col->insert_data("", 0); |
2494 | 0 | continue; |
2495 | 0 | } |
2496 | | |
2497 | | // an error occurs if any path argument is not a valid path expression. |
2498 | 0 | std::string root_path_str = "$"; |
2499 | 0 | JsonbPath root_path; |
2500 | 0 | root_path.seek(root_path_str.c_str(), root_path_str.size()); |
2501 | 0 | std::vector<JsonbPath*> paths; |
2502 | 0 | paths.push_back(&root_path); |
2503 | |
|
2504 | 0 | if (!search_is_const) { |
2505 | 0 | state_ptr = std::make_shared<LikeState>(); |
2506 | 0 | state_ptr->is_like_pattern = true; |
2507 | 0 | const auto& search_str = col_search_string->get_data_at(i); |
2508 | 0 | RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str, |
2509 | 0 | state_ptr, false)); |
2510 | 0 | state = state_ptr.get(); |
2511 | 0 | } |
2512 | | |
2513 | | // maintain a hashset to deduplicate matches. |
2514 | 0 | std::unordered_set<std::string> matches; |
2515 | 0 | for (const auto& item : paths) { |
2516 | 0 | auto* cur_path = item; |
2517 | 0 | auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches); |
2518 | 0 | if (is_one && find) { |
2519 | 0 | break; |
2520 | 0 | } |
2521 | 0 | } |
2522 | 0 | if (matches.empty()) { |
2523 | | // returns NULL if the search_str is not found in the document. |
2524 | 0 | null_map->get_data()[i] = 1; |
2525 | 0 | result_col->insert_data("", 0); |
2526 | 0 | continue; |
2527 | 0 | } |
2528 | | |
2529 | 0 | writer.reset(); |
2530 | 0 | make_result_str(writer, matches, result_col.get()); |
2531 | 0 | } |
2532 | 0 | auto result_col_nullable = |
2533 | 0 | ColumnNullable::create(std::move(result_col), std::move(null_map)); |
2534 | 0 | block.replace_by_position(result, std::move(result_col_nullable)); |
2535 | 0 | return Status::OK(); |
2536 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm Unexecuted instantiation: _ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm |
2537 | | |
2538 | | static constexpr auto one = "one"; |
2539 | | static constexpr auto all = "all"; |
2540 | | |
2541 | | public: |
2542 | | static constexpr auto name = "json_search"; |
2543 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); } |
2544 | | |
2545 | 1 | String get_name() const override { return name; } |
2546 | 1 | bool is_variadic() const override { return false; } |
2547 | 0 | size_t get_number_of_arguments() const override { return 3; } |
2548 | | |
2549 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2550 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2551 | 0 | } |
2552 | | |
2553 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
2554 | | |
2555 | 0 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
2556 | 0 | if (scope != FunctionContext::THREAD_LOCAL) { |
2557 | 0 | return Status::OK(); |
2558 | 0 | } |
2559 | 0 | if (context->is_col_constant(2)) { |
2560 | 0 | std::shared_ptr<LikeState> state = std::make_shared<LikeState>(); |
2561 | 0 | state->is_like_pattern = true; |
2562 | 0 | const auto pattern_col = context->get_constant_col(2)->column_ptr; |
2563 | 0 | const auto& pattern = pattern_col->get_data_at(0); |
2564 | 0 | RETURN_IF_ERROR( |
2565 | 0 | FunctionLike::construct_like_const_state(context, pattern, state, false)); |
2566 | 0 | context->set_function_state(scope, state); |
2567 | 0 | } |
2568 | 0 | return Status::OK(); |
2569 | 0 | } |
2570 | | |
2571 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2572 | 0 | uint32_t result, size_t input_rows_count) const override { |
2573 | | // the json_doc, one_or_all, and search_str must be given. |
2574 | | // and we require the positions are static. |
2575 | 0 | if (arguments.size() < 3) { |
2576 | 0 | return Status::InvalidArgument("too few arguments for function {}", name); |
2577 | 0 | } |
2578 | 0 | if (arguments.size() > 3) { |
2579 | 0 | return Status::NotSupported("escape and path params are not support now"); |
2580 | 0 | } |
2581 | | |
2582 | 0 | CheckNullFun json_null_check = always_not_null; |
2583 | 0 | GetJsonStringRefFun get_json_fun; |
2584 | | // prepare jsonb data column |
2585 | 0 | auto&& [col_json, json_is_const] = |
2586 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2587 | 0 | const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get()); |
2588 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) { |
2589 | 0 | col_json_string = |
2590 | 0 | check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get()); |
2591 | 0 | } |
2592 | |
|
2593 | 0 | if (!col_json_string) { |
2594 | 0 | return Status::RuntimeError("Illegal arg json {} should be ColumnString", |
2595 | 0 | col_json->get_name()); |
2596 | 0 | } |
2597 | | |
2598 | 0 | auto create_all_null_result = [&]() { |
2599 | 0 | auto res_str = ColumnString::create(); |
2600 | 0 | res_str->insert_default(); |
2601 | 0 | auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1)); |
2602 | 0 | if (input_rows_count > 1) { |
2603 | 0 | block.get_by_position(result).column = |
2604 | 0 | ColumnConst::create(std::move(res), input_rows_count); |
2605 | 0 | } else { |
2606 | 0 | block.get_by_position(result).column = std::move(res); |
2607 | 0 | } |
2608 | 0 | return Status::OK(); |
2609 | 0 | }; |
2610 | |
|
2611 | 0 | if (json_is_const) { |
2612 | 0 | if (col_json->is_null_at(0)) { |
2613 | 0 | return create_all_null_result(); |
2614 | 0 | } else { |
2615 | 0 | const auto& json_str = col_json_string->get_data_at(0); |
2616 | 0 | get_json_fun = [json_str](size_t i) { return json_str; }; |
2617 | 0 | } |
2618 | 0 | } else { |
2619 | 0 | json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); }; |
2620 | 0 | get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); }; |
2621 | 0 | } |
2622 | | |
2623 | | // one_or_all |
2624 | 0 | CheckNullFun one_null_check = always_not_null; |
2625 | 0 | OneFun one_check = always_one; |
2626 | 0 | auto&& [col_one, one_is_const] = |
2627 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2628 | 0 | one_is_const |= input_rows_count == 1; |
2629 | 0 | const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get()); |
2630 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) { |
2631 | 0 | col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2632 | 0 | } |
2633 | 0 | if (!col_one_string) { |
2634 | 0 | return Status::RuntimeError("Illegal arg one {} should be ColumnString", |
2635 | 0 | col_one->get_name()); |
2636 | 0 | } |
2637 | 0 | if (one_is_const) { |
2638 | 0 | if (col_one->is_null_at(0)) { |
2639 | 0 | return create_all_null_result(); |
2640 | 0 | } else { |
2641 | 0 | const auto& one_or_all = col_one_string->get_data_at(0); |
2642 | 0 | std::string one_or_all_str = one_or_all.to_string(); |
2643 | 0 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2644 | 0 | one_check = always_all; |
2645 | 0 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2646 | | // nothing |
2647 | 0 | } else { |
2648 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2649 | 0 | return Status::InvalidArgument( |
2650 | 0 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2651 | 0 | } |
2652 | 0 | } |
2653 | 0 | } else { |
2654 | 0 | one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); }; |
2655 | 0 | one_check = [col_one_string](size_t i, bool* is_one) { |
2656 | 0 | const auto& one_or_all = col_one_string->get_data_at(i); |
2657 | 0 | std::string one_or_all_str = one_or_all.to_string(); |
2658 | 0 | if (strcasecmp(one_or_all_str.c_str(), all) == 0) { |
2659 | 0 | *is_one = false; |
2660 | 0 | } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) { |
2661 | 0 | *is_one = true; |
2662 | 0 | } else { |
2663 | | // an error occurs if the one_or_all argument is not 'one' nor 'all'. |
2664 | 0 | return Status::InvalidArgument( |
2665 | 0 | "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str); |
2666 | 0 | } |
2667 | 0 | return Status::OK(); |
2668 | 0 | }; |
2669 | 0 | } |
2670 | | |
2671 | | // search_str |
2672 | 0 | auto&& [col_search, search_is_const] = |
2673 | 0 | unpack_if_const(block.get_by_position(arguments[2]).column); |
2674 | |
|
2675 | 0 | const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get()); |
2676 | 0 | if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) { |
2677 | 0 | col_search_string = |
2678 | 0 | check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr()); |
2679 | 0 | } |
2680 | 0 | if (!col_search_string) { |
2681 | 0 | return Status::RuntimeError("Illegal arg pattern {} should be ColumnString", |
2682 | 0 | col_search->get_name()); |
2683 | 0 | } |
2684 | 0 | if (search_is_const) { |
2685 | 0 | CheckNullFun search_null_check = always_not_null; |
2686 | 0 | if (col_search->is_null_at(0)) { |
2687 | 0 | return create_all_null_result(); |
2688 | 0 | } |
2689 | 0 | RETURN_IF_ERROR(execute_vector<true>( |
2690 | 0 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2691 | 0 | one_check, search_null_check, col_search_string, context, result)); |
2692 | 0 | } else { |
2693 | 0 | CheckNullFun search_null_check = [col_search](size_t i) { |
2694 | 0 | return col_search->is_null_at(i); |
2695 | 0 | }; |
2696 | 0 | RETURN_IF_ERROR(execute_vector<false>( |
2697 | 0 | block, input_rows_count, json_null_check, get_json_fun, one_null_check, |
2698 | 0 | one_check, search_null_check, col_search_string, context, result)); |
2699 | 0 | } |
2700 | 0 | return Status::OK(); |
2701 | 0 | } |
2702 | | }; |
2703 | | |
2704 | | struct DocumentBuffer { |
2705 | | std::unique_ptr<char[]> ptr; |
2706 | | size_t size = 0; |
2707 | | size_t capacity = 0; |
2708 | | }; |
2709 | | |
2710 | | class FunctionJsonbRemove : public IFunction { |
2711 | | public: |
2712 | | static constexpr auto name = "jsonb_remove"; |
2713 | | static constexpr auto alias = "json_remove"; |
2714 | | |
2715 | 8 | static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); } |
2716 | | |
2717 | 0 | String get_name() const override { return name; } |
2718 | | |
2719 | 0 | size_t get_number_of_arguments() const override { return 0; } |
2720 | 1 | bool is_variadic() const override { return true; } |
2721 | | |
2722 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
2723 | | |
2724 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2725 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
2726 | 0 | } |
2727 | | |
2728 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2729 | 0 | uint32_t result, size_t input_rows_count) const override { |
2730 | 0 | DCHECK_GE(arguments.size(), 2); |
2731 | | |
2732 | | // Check if arguments count is valid (json_doc + at least one path) |
2733 | 0 | if (arguments.size() < 2) { |
2734 | 0 | return Status::InvalidArgument("json_remove requires at least 2 arguments"); |
2735 | 0 | } |
2736 | | |
2737 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
2738 | 0 | auto result_column = return_data_type->create_column(); |
2739 | 0 | auto& nullable_column = assert_cast<ColumnNullable&>(*result_column); |
2740 | 0 | auto& res_chars = |
2741 | 0 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars(); |
2742 | 0 | auto& res_offsets = |
2743 | 0 | assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets(); |
2744 | 0 | auto& null_map = nullable_column.get_null_map_data(); |
2745 | |
|
2746 | 0 | res_chars.reserve(input_rows_count * 64); |
2747 | 0 | res_offsets.resize(input_rows_count); |
2748 | 0 | null_map.resize_fill(input_rows_count, 0); |
2749 | | |
2750 | | // Get JSON document column |
2751 | 0 | auto [json_column, json_const] = |
2752 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2753 | 0 | const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get()); |
2754 | 0 | const ColumnString* json_data_column = nullptr; |
2755 | 0 | const NullMap* json_null_map = nullptr; |
2756 | |
|
2757 | 0 | if (json_nullable) { |
2758 | 0 | json_null_map = &json_nullable->get_null_map_data(); |
2759 | 0 | json_data_column = |
2760 | 0 | check_and_get_column<ColumnString>(&json_nullable->get_nested_column()); |
2761 | 0 | } else { |
2762 | 0 | json_data_column = check_and_get_column<ColumnString>(json_column.get()); |
2763 | 0 | } |
2764 | |
|
2765 | 0 | if (!json_data_column) { |
2766 | 0 | return Status::InvalidArgument("First argument must be a JSON document"); |
2767 | 0 | } |
2768 | | |
2769 | | // Parse paths |
2770 | 0 | std::vector<const ColumnString*> path_columns; |
2771 | 0 | std::vector<const NullMap*> path_null_maps; |
2772 | 0 | std::vector<bool> path_constants; |
2773 | |
|
2774 | 0 | for (size_t i = 1; i < arguments.size(); ++i) { |
2775 | 0 | auto [path_column, path_const] = |
2776 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2777 | 0 | const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get()); |
2778 | |
|
2779 | 0 | if (path_nullable) { |
2780 | 0 | path_null_maps.push_back(&path_nullable->get_null_map_data()); |
2781 | 0 | path_columns.push_back( |
2782 | 0 | check_and_get_column<ColumnString>(&path_nullable->get_nested_column())); |
2783 | 0 | } else { |
2784 | 0 | path_null_maps.push_back(nullptr); |
2785 | 0 | path_columns.push_back(check_and_get_column<ColumnString>(path_column.get())); |
2786 | 0 | } |
2787 | |
|
2788 | 0 | if (!path_columns.back()) { |
2789 | 0 | return Status::InvalidArgument( |
2790 | 0 | fmt::format("Argument {} must be a string path", i + 1)); |
2791 | 0 | } |
2792 | | |
2793 | 0 | path_constants.push_back(path_const); |
2794 | 0 | } |
2795 | | |
2796 | | // Reusable JsonbWriter for performance |
2797 | 0 | JsonbWriter writer; |
2798 | |
|
2799 | 0 | for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { |
2800 | 0 | size_t json_idx = index_check_const(row_idx, json_const); |
2801 | | |
2802 | | // Check if JSON document is null |
2803 | 0 | if (json_null_map && (*json_null_map)[json_idx]) { |
2804 | 0 | null_map[row_idx] = 1; |
2805 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2806 | 0 | continue; |
2807 | 0 | } |
2808 | | |
2809 | | // Parse JSON document |
2810 | 0 | const auto& json_data = json_data_column->get_data_at(json_idx); |
2811 | 0 | const JsonbDocument* json_doc = nullptr; |
2812 | 0 | Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data, |
2813 | 0 | json_data.size, &json_doc); |
2814 | |
|
2815 | 0 | if (!parse_status.ok() || !json_doc) { |
2816 | 0 | null_map[row_idx] = 1; |
2817 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2818 | 0 | continue; |
2819 | 0 | } |
2820 | | |
2821 | | // Check if any path is null |
2822 | 0 | bool has_null_path = false; |
2823 | 0 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2824 | 0 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2825 | 0 | if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) { |
2826 | 0 | has_null_path = true; |
2827 | 0 | break; |
2828 | 0 | } |
2829 | 0 | } |
2830 | |
|
2831 | 0 | if (has_null_path) { |
2832 | 0 | null_map[row_idx] = 1; |
2833 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2834 | 0 | continue; |
2835 | 0 | } |
2836 | | |
2837 | 0 | std::vector<JsonbPath> paths; |
2838 | 0 | std::vector<bool> path_constants_vec; |
2839 | |
|
2840 | 0 | for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) { |
2841 | 0 | size_t idx = index_check_const(row_idx, path_constants[path_idx]); |
2842 | 0 | const auto& path_data = path_columns[path_idx]->get_data_at(idx); |
2843 | |
|
2844 | 0 | JsonbPath path; |
2845 | 0 | if (!path.seek(path_data.data, path_data.size)) { |
2846 | 0 | return Status::InvalidArgument( |
2847 | 0 | "Json path error: Invalid Json Path for value: {} at row: {}", |
2848 | 0 | std::string_view(path_data.data, path_data.size), row_idx); |
2849 | 0 | } |
2850 | | |
2851 | 0 | if (path.is_wildcard() || path.is_supper_wildcard()) { |
2852 | 0 | return Status::InvalidArgument( |
2853 | 0 | "In this situation, path expressions may not contain the * and ** " |
2854 | 0 | "tokens or an array range, argument index: {}, row index: {}", |
2855 | 0 | path_idx + 1, row_idx); |
2856 | 0 | } |
2857 | | |
2858 | 0 | paths.push_back(std::move(path)); |
2859 | 0 | path_constants_vec.push_back(path_constants[path_idx]); |
2860 | 0 | } |
2861 | | |
2862 | 0 | const JsonbValue* current_value = json_doc->getValue(); |
2863 | |
|
2864 | 0 | DocumentBuffer tmp_buffer; |
2865 | |
|
2866 | 0 | for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) { |
2867 | 0 | writer.reset(); |
2868 | |
|
2869 | 0 | auto find_result = current_value->findValue(paths[path_idx]); |
2870 | |
|
2871 | 0 | if (find_result.is_wildcard) { |
2872 | 0 | continue; |
2873 | 0 | } |
2874 | | |
2875 | 0 | if (find_result.value) { |
2876 | 0 | RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer)); |
2877 | | |
2878 | 0 | auto* writer_output = writer.getOutput(); |
2879 | 0 | if (writer_output->getSize() > tmp_buffer.capacity) { |
2880 | 0 | tmp_buffer.capacity = |
2881 | 0 | ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024; |
2882 | 0 | tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity); |
2883 | 0 | DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity); |
2884 | 0 | } |
2885 | |
|
2886 | 0 | memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), |
2887 | 0 | writer_output->getSize()); |
2888 | 0 | tmp_buffer.size = writer_output->getSize(); |
2889 | |
|
2890 | 0 | const JsonbDocument* new_doc = nullptr; |
2891 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2892 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc)); |
2893 | | |
2894 | 0 | current_value = new_doc->getValue(); |
2895 | 0 | } |
2896 | 0 | } |
2897 | | |
2898 | 0 | const JsonbDocument* modified_doc = nullptr; |
2899 | 0 | if (current_value != json_doc->getValue()) { |
2900 | 0 | RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
2901 | 0 | tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc)); |
2902 | 0 | } else { |
2903 | 0 | modified_doc = json_doc; |
2904 | 0 | } |
2905 | | |
2906 | | // Write the final result |
2907 | 0 | const auto size = modified_doc->numPackedBytes(); |
2908 | 0 | res_chars.insert(reinterpret_cast<const char*>(modified_doc), |
2909 | 0 | reinterpret_cast<const char*>(modified_doc) + size); |
2910 | 0 | res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size()); |
2911 | 0 | } |
2912 | | |
2913 | 0 | block.get_by_position(result).column = std::move(result_column); |
2914 | 0 | return Status::OK(); |
2915 | 0 | } |
2916 | | |
2917 | | private: |
2918 | | Status clone_without_path(const JsonbValue* root, const JsonbPath& path, |
2919 | 0 | JsonbWriter& writer) const { |
2920 | | // Start writing at the root level |
2921 | 0 | if (root->isObject()) { |
2922 | 0 | writer.writeStartObject(); |
2923 | 0 | RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer)); |
2924 | 0 | writer.writeEndObject(); |
2925 | 0 | } else if (root->isArray()) { |
2926 | 0 | writer.writeStartArray(); |
2927 | 0 | RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer)); |
2928 | 0 | writer.writeEndArray(); |
2929 | 0 | } else { |
2930 | | // Primitive value - can't remove anything from it |
2931 | 0 | writer.writeValue(root); |
2932 | 0 | } |
2933 | 0 | return Status::OK(); |
2934 | 0 | } |
2935 | | |
2936 | | Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path, |
2937 | 0 | size_t depth, JsonbWriter& writer) const { |
2938 | 0 | const auto* obj = obj_value->unpack<ObjectVal>(); |
2939 | |
|
2940 | 0 | for (const auto& kv : *obj) { |
2941 | 0 | std::string key(kv.getKeyStr(), kv.klen()); |
2942 | |
|
2943 | 0 | if (depth < path.get_leg_vector_size()) { |
2944 | 0 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2945 | 0 | if (leg->type == MEMBER_CODE) { |
2946 | 0 | std::string target_key(leg->leg_ptr, leg->leg_len); |
2947 | |
|
2948 | 0 | if (key == target_key) { |
2949 | 0 | if (depth == path.get_leg_vector_size() - 1) { |
2950 | 0 | continue; |
2951 | 0 | } else { |
2952 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2953 | 0 | if (kv.value()->isObject()) { |
2954 | 0 | writer.writeStartObject(); |
2955 | 0 | RETURN_IF_ERROR(clone_object_without_path(kv.value(), path, |
2956 | 0 | depth + 1, writer)); |
2957 | 0 | writer.writeEndObject(); |
2958 | 0 | } else if (kv.value()->isArray()) { |
2959 | 0 | writer.writeStartArray(); |
2960 | 0 | RETURN_IF_ERROR(clone_array_without_path(kv.value(), path, |
2961 | 0 | depth + 1, writer)); |
2962 | 0 | writer.writeEndArray(); |
2963 | 0 | } else { |
2964 | 0 | writer.writeValue(kv.value()); |
2965 | 0 | } |
2966 | 0 | } |
2967 | 0 | } else { |
2968 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2969 | 0 | writer.writeValue(kv.value()); |
2970 | 0 | } |
2971 | 0 | } else { |
2972 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2973 | 0 | writer.writeValue(kv.value()); |
2974 | 0 | } |
2975 | 0 | } else { |
2976 | 0 | writer.writeKey(kv.getKeyStr(), kv.klen()); |
2977 | 0 | writer.writeValue(kv.value()); |
2978 | 0 | } |
2979 | 0 | } |
2980 | | |
2981 | 0 | return Status::OK(); |
2982 | 0 | } |
2983 | | |
2984 | | Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path, |
2985 | 0 | size_t depth, JsonbWriter& writer) const { |
2986 | 0 | const auto* arr = arr_value->unpack<ArrayVal>(); |
2987 | |
|
2988 | 0 | int index = 0; |
2989 | 0 | for (const auto& element : *arr) { |
2990 | 0 | if (depth < path.get_leg_vector_size()) { |
2991 | 0 | const auto* leg = path.get_leg_from_leg_vector(depth); |
2992 | 0 | if (leg->type == ARRAY_CODE) { |
2993 | 0 | int target_index = leg->array_index; |
2994 | |
|
2995 | 0 | if (index == target_index) { |
2996 | 0 | if (depth == path.get_leg_vector_size() - 1) { |
2997 | | // This is the target element to remove - skip it |
2998 | 0 | } else { |
2999 | 0 | if (element.isObject()) { |
3000 | 0 | writer.writeStartObject(); |
3001 | 0 | RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1, |
3002 | 0 | writer)); |
3003 | 0 | writer.writeEndObject(); |
3004 | 0 | } else if (element.isArray()) { |
3005 | 0 | writer.writeStartArray(); |
3006 | 0 | RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1, |
3007 | 0 | writer)); |
3008 | 0 | writer.writeEndArray(); |
3009 | 0 | } else { |
3010 | 0 | writer.writeValue(&element); |
3011 | 0 | } |
3012 | 0 | } |
3013 | 0 | } else { |
3014 | 0 | writer.writeValue(&element); |
3015 | 0 | } |
3016 | 0 | } else { |
3017 | 0 | writer.writeValue(&element); |
3018 | 0 | } |
3019 | 0 | } else { |
3020 | 0 | writer.writeValue(&element); |
3021 | 0 | } |
3022 | 0 | index++; |
3023 | 0 | } |
3024 | | |
3025 | 0 | return Status::OK(); |
3026 | 0 | } |
3027 | | }; |
3028 | | |
3029 | | class FunctionStripNullValue : public IFunction { |
3030 | | public: |
3031 | | static constexpr auto name = "strip_null_value"; |
3032 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); } |
3033 | | |
3034 | 1 | String get_name() const override { return name; } |
3035 | 1 | bool is_variadic() const override { return false; } |
3036 | 0 | size_t get_number_of_arguments() const override { return 1; } |
3037 | | |
3038 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
3039 | | |
3040 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3041 | 0 | return make_nullable(std::make_shared<DataTypeJsonb>()); |
3042 | 0 | } |
3043 | | |
3044 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3045 | 0 | uint32_t result, size_t input_rows_count) const override { |
3046 | 0 | const auto& arg_column = block.get_by_position(arguments[0]).column; |
3047 | 0 | const ColumnString* json_column = nullptr; |
3048 | 0 | const NullMap* json_null_map = nullptr; |
3049 | 0 | if (arg_column->is_nullable()) { |
3050 | 0 | const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column); |
3051 | 0 | json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column()); |
3052 | 0 | json_null_map = &nullable_col.get_null_map_data(); |
3053 | 0 | } else { |
3054 | 0 | json_column = assert_cast<const ColumnString*>(arg_column.get()); |
3055 | 0 | } |
3056 | |
|
3057 | 0 | auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>()); |
3058 | 0 | auto result_column = return_data_type->create_column(); |
3059 | |
|
3060 | 0 | auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data(); |
3061 | 0 | auto& result_data_col = assert_cast<ColumnString&>( |
3062 | 0 | assert_cast<ColumnNullable&>(*result_column).get_nested_column()); |
3063 | |
|
3064 | 0 | result_nullmap.resize_fill(input_rows_count, 0); |
3065 | 0 | for (size_t i = 0; i != input_rows_count; ++i) { |
3066 | 0 | if (json_null_map && (*json_null_map)[i]) { |
3067 | 0 | result_nullmap[i] = 1; |
3068 | 0 | result_data_col.insert_default(); |
3069 | 0 | continue; |
3070 | 0 | } |
3071 | 0 | const JsonbDocument* json_doc = nullptr; |
3072 | 0 | const auto& json_str = json_column->get_data_at(i); |
3073 | 0 | RETURN_IF_ERROR( |
3074 | 0 | JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc)); |
3075 | 0 | if (json_doc) [[likely]] { |
3076 | 0 | if (json_doc->getValue()->isNull()) { |
3077 | 0 | result_nullmap[i] = 1; |
3078 | 0 | result_data_col.insert_default(); |
3079 | 0 | } else { |
3080 | 0 | result_nullmap[i] = 0; |
3081 | 0 | result_data_col.insert_data(json_str.data, json_str.size); |
3082 | 0 | } |
3083 | 0 | } else { |
3084 | 0 | result_nullmap[i] = 1; |
3085 | 0 | result_data_col.insert_default(); |
3086 | 0 | } |
3087 | 0 | } |
3088 | | |
3089 | 0 | block.get_by_position(result).column = std::move(result_column); |
3090 | 0 | return Status::OK(); |
3091 | 0 | } |
3092 | | }; |
3093 | | |
3094 | 7 | void register_function_jsonb(SimpleFunctionFactory& factory) { |
3095 | 7 | factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name); |
3096 | 7 | factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias); |
3097 | 7 | factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null"); |
3098 | 7 | factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null"); |
3099 | 7 | factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value"); |
3100 | 7 | factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value"); |
3101 | | |
3102 | 7 | factory.register_function<FunctionJsonbExists>(); |
3103 | 7 | factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias); |
3104 | 7 | factory.register_function<FunctionJsonbType>(); |
3105 | 7 | factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias); |
3106 | | |
3107 | 7 | factory.register_function<FunctionJsonbKeys>(); |
3108 | 7 | factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias); |
3109 | | |
3110 | 7 | factory.register_function<FunctionJsonbExtractIsnull>(); |
3111 | 7 | factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias); |
3112 | | |
3113 | 7 | factory.register_function<FunctionJsonbExtractJsonb>(); |
3114 | 7 | factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias); |
3115 | 7 | factory.register_function<FunctionJsonbExtractJsonbNoQuotes>(); |
3116 | 7 | factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name, |
3117 | 7 | FunctionJsonbExtractJsonbNoQuotes::alias); |
3118 | | |
3119 | 7 | factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>(); |
3120 | 7 | factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>(); |
3121 | | |
3122 | 7 | factory.register_function<FunctionJsonSearch>(); |
3123 | | |
3124 | 7 | factory.register_function<FunctionJsonbArray<false>>(); |
3125 | 7 | factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias); |
3126 | | |
3127 | 7 | factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null"); |
3128 | 7 | factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null"); |
3129 | | |
3130 | 7 | factory.register_function<FunctionJsonbObject>(); |
3131 | 7 | factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias); |
3132 | | |
3133 | 7 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>(); |
3134 | 7 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name, |
3135 | 7 | FunctionJsonbModify<JsonbModifyType::Insert>::alias); |
3136 | 7 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>(); |
3137 | 7 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name, |
3138 | 7 | FunctionJsonbModify<JsonbModifyType::Set>::alias); |
3139 | 7 | factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>(); |
3140 | 7 | factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name, |
3141 | 7 | FunctionJsonbModify<JsonbModifyType::Replace>::alias); |
3142 | | |
3143 | 7 | factory.register_function<FunctionJsonbRemove>(); |
3144 | 7 | factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias); |
3145 | | |
3146 | 7 | factory.register_function<FunctionStripNullValue>(); |
3147 | 7 | } |
3148 | | |
3149 | | } // namespace doris |