Coverage Report

Created: 2026-03-12 17:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_jsonb.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
20
#include <algorithm>
21
#include <cstdlib>
22
#include <memory>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <type_traits>
27
#include <utility>
28
#include <variant>
29
30
#include "common/compiler_util.h" // IWYU pragma: keep
31
#include "common/status.h"
32
#include "core/assert_cast.h"
33
#include "core/block/block.h"
34
#include "core/block/column_numbers.h"
35
#include "core/block/column_with_type_and_name.h"
36
#include "core/column/column.h"
37
#include "core/column/column_array.h"
38
#include "core/column/column_const.h"
39
#include "core/column/column_nullable.h"
40
#include "core/column/column_string.h"
41
#include "core/column/column_vector.h"
42
#include "core/custom_allocator.h"
43
#include "core/data_type/data_type.h"
44
#include "core/data_type/data_type_array.h"
45
#include "core/data_type/data_type_jsonb.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/data_type/data_type_string.h"
48
#include "core/data_type/define_primitive_type.h"
49
#include "core/data_type/primitive_type.h"
50
#include "core/string_ref.h"
51
#include "core/types.h"
52
#include "core/value/jsonb_value.h"
53
#include "exec/common/stringop_substring.h"
54
#include "exec/common/template_helpers.hpp"
55
#include "exec/common/util.hpp"
56
#include "exprs/aggregate/aggregate_function.h"
57
#include "exprs/function/function.h"
58
#include "exprs/function/like.h"
59
#include "exprs/function/simple_function_factory.h"
60
#include "exprs/function_context.h"
61
#include "util/jsonb_document.h"
62
#include "util/jsonb_stream.h"
63
#include "util/jsonb_utils.h"
64
#include "util/jsonb_writer.h"
65
#include "util/simd/bits.h"
66
67
namespace doris {
68
#include "common/compile_check_begin.h"
69
70
enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT };
71
72
enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE };
73
74
// func(string,string) -> json
75
template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode>
76
class FunctionJsonbParseBase : public IFunction {
77
private:
78
    struct FunctionJsonbParseState {
79
        StringRef default_value;
80
        JsonBinaryValue default_value_parser;
81
        bool has_const_default_value = false;
82
        bool default_is_null = false;
83
    };
84
85
public:
86
    static constexpr auto name = "json_parse";
87
    static constexpr auto alias = "jsonb_parse";
88
87
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv
Line
Count
Source
88
27
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv
Line
Count
Source
88
39
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv
Line
Count
Source
88
21
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
89
90
4
    String get_name() const override {
91
4
        String error_mode;
92
4
        switch (parse_error_handle_mode) {
93
1
        case JsonbParseErrorMode::FAIL:
94
1
            break;
95
1
        case JsonbParseErrorMode::RETURN_NULL:
96
1
            error_mode = "_error_to_null";
97
1
            break;
98
2
        case JsonbParseErrorMode::RETURN_VALUE:
99
2
            error_mode = "_error_to_value";
100
2
            break;
101
4
        }
102
103
4
        return name + error_mode;
104
4
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev
Line
Count
Source
90
1
    String get_name() const override {
91
1
        String error_mode;
92
1
        switch (parse_error_handle_mode) {
93
1
        case JsonbParseErrorMode::FAIL:
94
1
            break;
95
0
        case JsonbParseErrorMode::RETURN_NULL:
96
0
            error_mode = "_error_to_null";
97
0
            break;
98
0
        case JsonbParseErrorMode::RETURN_VALUE:
99
0
            error_mode = "_error_to_value";
100
0
            break;
101
1
        }
102
103
1
        return name + error_mode;
104
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev
Line
Count
Source
90
1
    String get_name() const override {
91
1
        String error_mode;
92
1
        switch (parse_error_handle_mode) {
93
0
        case JsonbParseErrorMode::FAIL:
94
0
            break;
95
1
        case JsonbParseErrorMode::RETURN_NULL:
96
1
            error_mode = "_error_to_null";
97
1
            break;
98
0
        case JsonbParseErrorMode::RETURN_VALUE:
99
0
            error_mode = "_error_to_value";
100
0
            break;
101
1
        }
102
103
1
        return name + error_mode;
104
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev
Line
Count
Source
90
2
    String get_name() const override {
91
2
        String error_mode;
92
2
        switch (parse_error_handle_mode) {
93
0
        case JsonbParseErrorMode::FAIL:
94
0
            break;
95
0
        case JsonbParseErrorMode::RETURN_NULL:
96
0
            error_mode = "_error_to_null";
97
0
            break;
98
2
        case JsonbParseErrorMode::RETURN_VALUE:
99
2
            error_mode = "_error_to_value";
100
2
            break;
101
2
        }
102
103
2
        return name + error_mode;
104
2
    }
105
106
64
    bool is_variadic() const override {
107
64
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
108
64
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv
Line
Count
Source
106
19
    bool is_variadic() const override {
107
19
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
108
19
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv
Line
Count
Source
106
31
    bool is_variadic() const override {
107
31
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
108
31
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv
Line
Count
Source
106
14
    bool is_variadic() const override {
107
14
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
108
14
    }
109
110
49
    size_t get_number_of_arguments() const override {
111
49
        switch (parse_error_handle_mode) {
112
18
        case JsonbParseErrorMode::FAIL:
113
18
            return 1;
114
30
        case JsonbParseErrorMode::RETURN_NULL:
115
30
            return 1;
116
1
        case JsonbParseErrorMode::RETURN_VALUE:
117
1
            return 0;
118
49
        }
119
49
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv
Line
Count
Source
110
18
    size_t get_number_of_arguments() const override {
111
18
        switch (parse_error_handle_mode) {
112
18
        case JsonbParseErrorMode::FAIL:
113
18
            return 1;
114
0
        case JsonbParseErrorMode::RETURN_NULL:
115
0
            return 1;
116
0
        case JsonbParseErrorMode::RETURN_VALUE:
117
0
            return 0;
118
18
        }
119
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv
Line
Count
Source
110
30
    size_t get_number_of_arguments() const override {
111
30
        switch (parse_error_handle_mode) {
112
0
        case JsonbParseErrorMode::FAIL:
113
0
            return 1;
114
30
        case JsonbParseErrorMode::RETURN_NULL:
115
30
            return 1;
116
0
        case JsonbParseErrorMode::RETURN_VALUE:
117
0
            return 0;
118
30
        }
119
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv
Line
Count
Source
110
1
    size_t get_number_of_arguments() const override {
111
1
        switch (parse_error_handle_mode) {
112
0
        case JsonbParseErrorMode::FAIL:
113
0
            return 1;
114
0
        case JsonbParseErrorMode::RETURN_NULL:
115
0
            return 1;
116
1
        case JsonbParseErrorMode::RETURN_VALUE:
117
1
            return 0;
118
1
        }
119
1
    }
120
121
60
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
122
60
        bool is_nullable = false;
123
60
        switch (nullable_mode) {
124
30
        case NullalbeMode::NULLABLE:
125
30
            is_nullable = true;
126
30
            break;
127
30
        case NullalbeMode::FOLLOW_INPUT: {
128
41
            for (auto arg : arguments) {
129
41
                is_nullable |= arg->is_nullable();
130
41
            }
131
30
            break;
132
0
        }
133
60
        }
134
135
60
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
136
60
                           : std::make_shared<DataTypeJsonb>();
137
60
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
121
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
122
18
        bool is_nullable = false;
123
18
        switch (nullable_mode) {
124
0
        case NullalbeMode::NULLABLE:
125
0
            is_nullable = true;
126
0
            break;
127
18
        case NullalbeMode::FOLLOW_INPUT: {
128
18
            for (auto arg : arguments) {
129
18
                is_nullable |= arg->is_nullable();
130
18
            }
131
18
            break;
132
0
        }
133
18
        }
134
135
18
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
136
18
                           : std::make_shared<DataTypeJsonb>();
137
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
121
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
122
30
        bool is_nullable = false;
123
30
        switch (nullable_mode) {
124
30
        case NullalbeMode::NULLABLE:
125
30
            is_nullable = true;
126
30
            break;
127
0
        case NullalbeMode::FOLLOW_INPUT: {
128
0
            for (auto arg : arguments) {
129
0
                is_nullable |= arg->is_nullable();
130
0
            }
131
0
            break;
132
0
        }
133
30
        }
134
135
30
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
136
30
                           : std::make_shared<DataTypeJsonb>();
137
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
121
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
122
12
        bool is_nullable = false;
123
12
        switch (nullable_mode) {
124
0
        case NullalbeMode::NULLABLE:
125
0
            is_nullable = true;
126
0
            break;
127
12
        case NullalbeMode::FOLLOW_INPUT: {
128
23
            for (auto arg : arguments) {
129
23
                is_nullable |= arg->is_nullable();
130
23
            }
131
12
            break;
132
0
        }
133
12
        }
134
135
12
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
136
12
                           : std::make_shared<DataTypeJsonb>();
137
12
    }
138
139
135
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
139
44
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
139
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
139
27
    bool use_default_implementation_for_nulls() const override { return false; }
140
141
318
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
142
318
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
143
59
            std::shared_ptr<FunctionJsonbParseState> state =
144
59
                    std::make_shared<FunctionJsonbParseState>();
145
59
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
146
59
        }
147
318
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
148
132
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
149
11
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
150
11
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
151
11
                if (state) {
152
11
                    if (context->get_num_args() == 2) {
153
8
                        if (context->is_col_constant(1)) {
154
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
155
2
                            if (default_value_col->is_null_at(0)) {
156
1
                                state->default_is_null = true;
157
1
                            } else {
158
1
                                const auto& default_value = default_value_col->get_data_at(0);
159
160
1
                                state->default_value = default_value;
161
1
                                state->has_const_default_value = true;
162
1
                            }
163
2
                        }
164
8
                    } else if (context->get_num_args() == 1) {
165
2
                        RETURN_IF_ERROR(
166
2
                                state->default_value_parser.from_json_string(std::string("{}")));
167
2
                        state->default_value = StringRef(state->default_value_parser.value(),
168
2
                                                         state->default_value_parser.size());
169
2
                        state->has_const_default_value = true;
170
2
                    }
171
11
                }
172
11
            }
173
174
132
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
175
1
                return Status::InvalidArgument(
176
1
                        "{} function should have 1 or 2 arguments, "
177
1
                        "but got {}",
178
1
                        get_name(), context->get_num_args());
179
1
            }
180
132
        }
181
131
        return Status::OK();
182
318
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
141
73
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
142
73
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
143
18
            std::shared_ptr<FunctionJsonbParseState> state =
144
18
                    std::make_shared<FunctionJsonbParseState>();
145
18
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
146
18
        }
147
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
148
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
149
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
150
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
151
                if (state) {
152
                    if (context->get_num_args() == 2) {
153
                        if (context->is_col_constant(1)) {
154
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
155
                            if (default_value_col->is_null_at(0)) {
156
                                state->default_is_null = true;
157
                            } else {
158
                                const auto& default_value = default_value_col->get_data_at(0);
159
160
                                state->default_value = default_value;
161
                                state->has_const_default_value = true;
162
                            }
163
                        }
164
                    } else if (context->get_num_args() == 1) {
165
                        RETURN_IF_ERROR(
166
                                state->default_value_parser.from_json_string(std::string("{}")));
167
                        state->default_value = StringRef(state->default_value_parser.value(),
168
                                                         state->default_value_parser.size());
169
                        state->has_const_default_value = true;
170
                    }
171
                }
172
            }
173
174
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
175
                return Status::InvalidArgument(
176
                        "{} function should have 1 or 2 arguments, "
177
                        "but got {}",
178
                        get_name(), context->get_num_args());
179
            }
180
        }
181
73
        return Status::OK();
182
73
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
141
113
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
142
113
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
143
30
            std::shared_ptr<FunctionJsonbParseState> state =
144
30
                    std::make_shared<FunctionJsonbParseState>();
145
30
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
146
30
        }
147
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
148
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
149
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
150
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
151
                if (state) {
152
                    if (context->get_num_args() == 2) {
153
                        if (context->is_col_constant(1)) {
154
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
155
                            if (default_value_col->is_null_at(0)) {
156
                                state->default_is_null = true;
157
                            } else {
158
                                const auto& default_value = default_value_col->get_data_at(0);
159
160
                                state->default_value = default_value;
161
                                state->has_const_default_value = true;
162
                            }
163
                        }
164
                    } else if (context->get_num_args() == 1) {
165
                        RETURN_IF_ERROR(
166
                                state->default_value_parser.from_json_string(std::string("{}")));
167
                        state->default_value = StringRef(state->default_value_parser.value(),
168
                                                         state->default_value_parser.size());
169
                        state->has_const_default_value = true;
170
                    }
171
                }
172
            }
173
174
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
175
                return Status::InvalidArgument(
176
                        "{} function should have 1 or 2 arguments, "
177
                        "but got {}",
178
                        get_name(), context->get_num_args());
179
            }
180
        }
181
113
        return Status::OK();
182
113
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
141
132
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
142
132
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
143
11
            std::shared_ptr<FunctionJsonbParseState> state =
144
11
                    std::make_shared<FunctionJsonbParseState>();
145
11
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
146
11
        }
147
132
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
148
132
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
149
11
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
150
11
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
151
11
                if (state) {
152
11
                    if (context->get_num_args() == 2) {
153
8
                        if (context->is_col_constant(1)) {
154
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
155
2
                            if (default_value_col->is_null_at(0)) {
156
1
                                state->default_is_null = true;
157
1
                            } else {
158
1
                                const auto& default_value = default_value_col->get_data_at(0);
159
160
1
                                state->default_value = default_value;
161
1
                                state->has_const_default_value = true;
162
1
                            }
163
2
                        }
164
8
                    } else if (context->get_num_args() == 1) {
165
2
                        RETURN_IF_ERROR(
166
2
                                state->default_value_parser.from_json_string(std::string("{}")));
167
2
                        state->default_value = StringRef(state->default_value_parser.value(),
168
2
                                                         state->default_value_parser.size());
169
2
                        state->has_const_default_value = true;
170
2
                    }
171
11
                }
172
11
            }
173
174
132
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
175
1
                return Status::InvalidArgument(
176
1
                        "{} function should have 1 or 2 arguments, "
177
1
                        "but got {}",
178
1
                        get_name(), context->get_num_args());
179
1
            }
180
132
        }
181
131
        return Status::OK();
182
132
    }
183
184
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
185
75
                        uint32_t result, size_t input_rows_count) const override {
186
75
        auto&& [col_from, col_from_is_const] =
187
75
                unpack_if_const(block.get_by_position(arguments[0]).column);
188
189
75
        if (col_from_is_const && col_from->is_null_at(0)) {
190
1
            auto col_str = ColumnString::create();
191
1
            col_str->insert_default();
192
1
            auto null_map = ColumnUInt8::create(1, 1);
193
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
194
1
            block.get_by_position(result).column =
195
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
196
1
            return Status::OK();
197
1
        }
198
199
74
        auto null_map = ColumnUInt8::create(0, 0);
200
74
        bool is_nullable = false;
201
202
74
        switch (nullable_mode) {
203
34
        case NullalbeMode::NULLABLE: {
204
34
            is_nullable = true;
205
34
            break;
206
0
        }
207
40
        case NullalbeMode::FOLLOW_INPUT: {
208
52
            for (auto arg : arguments) {
209
52
                is_nullable |= block.get_by_position(arg).type->is_nullable();
210
52
            }
211
40
            break;
212
0
        }
213
74
        }
214
215
74
        if (is_nullable) {
216
64
            null_map = ColumnUInt8::create(input_rows_count, 0);
217
64
        }
218
219
60
        const ColumnString* col_from_string = nullptr;
220
74
        if (col_from->is_nullable()) {
221
39
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
222
223
39
            VectorizedUtils::update_null_map(null_map->get_data(),
224
39
                                             nullable_col.get_null_map_data());
225
39
            col_from_string =
226
39
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
227
39
        } else {
228
35
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
229
35
        }
230
231
60
        StringRef constant_default_value;
232
60
        bool default_value_const = false;
233
60
        bool default_value_null_const = false;
234
60
        ColumnPtr default_value_col;
235
60
        JsonBinaryValue default_jsonb_value_parser;
236
60
        const ColumnString* default_value_str_col = nullptr;
237
60
        const NullMap* default_value_nullmap = nullptr;
238
60
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
239
14
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
240
14
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
241
14
            if (state && state->has_const_default_value) {
242
7
                constant_default_value = state->default_value;
243
7
                default_value_null_const = state->default_is_null;
244
7
                default_value_const = true;
245
7
            } else if (arguments.size() > 1) {
246
7
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
247
7
                    PrimitiveType::TYPE_JSONB) {
248
1
                    return Status::InvalidArgument(
249
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
250
1
                            block.get_by_position(arguments[1]).type->get_name());
251
1
                }
252
6
                std::tie(default_value_col, default_value_const) =
253
6
                        unpack_if_const(block.get_by_position(arguments[1]).column);
254
6
                if (default_value_const) {
255
1
                    const JsonbDocument* default_value_doc = nullptr;
256
1
                    if (default_value_col->is_null_at(0)) {
257
1
                        default_value_null_const = true;
258
1
                    } else {
259
0
                        auto data = default_value_col->get_data_at(0);
260
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
261
0
                                                                              &default_value_doc));
262
0
                        constant_default_value = data;
263
0
                    }
264
5
                } else {
265
5
                    if (default_value_col->is_nullable()) {
266
4
                        const auto& nullable_col =
267
4
                                assert_cast<const ColumnNullable&>(*default_value_col);
268
4
                        default_value_str_col = assert_cast<const ColumnString*>(
269
4
                                nullable_col.get_nested_column_ptr().get());
270
4
                        default_value_nullmap = &(nullable_col.get_null_map_data());
271
4
                    } else {
272
1
                        default_value_str_col =
273
1
                                assert_cast<const ColumnString*>(default_value_col.get());
274
1
                    }
275
5
                }
276
6
            } else if (arguments.size() == 1) {
277
                // parse default value '{}' should always success.
278
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
279
0
                default_value_const = true;
280
0
                constant_default_value.data = default_jsonb_value_parser.value();
281
0
                constant_default_value.size = default_jsonb_value_parser.size();
282
0
            }
283
14
        }
284
285
13
        auto col_to = ColumnString::create();
286
287
60
        col_to->reserve(input_rows_count);
288
289
60
        auto& null_map_data = null_map->get_data();
290
291
        // parser can be reused for performance
292
60
        JsonBinaryValue jsonb_value;
293
294
282
        for (size_t i = 0; i < input_rows_count; ++i) {
295
205
            if (is_nullable && null_map_data[i]) {
296
13
                col_to->insert_default();
297
13
                continue;
298
13
            }
299
300
192
            auto index = index_check_const(i, col_from_is_const);
301
192
            const auto& val = col_from_string->get_data_at(index);
302
192
            auto st = jsonb_value.from_json_string(val.data, val.size);
303
192
            if (st.ok()) {
304
                // insert jsonb format data
305
138
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
306
138
            } else {
307
54
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
308
6
                    return Status::InvalidArgument(
309
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
310
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
311
17
                    null_map_data[i] = 1;
312
17
                    col_to->insert_default();
313
31
                } else {
314
31
                    if (default_value_const) {
315
9
                        if (default_value_null_const) {
316
3
                            null_map_data[i] = 1;
317
3
                            col_to->insert_default();
318
6
                        } else {
319
6
                            col_to->insert_data(constant_default_value.data,
320
6
                                                constant_default_value.size);
321
6
                        }
322
22
                    } else {
323
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
324
3
                            null_map_data[i] = 1;
325
3
                            col_to->insert_default();
326
3
                            continue;
327
3
                        }
328
19
                        auto value = default_value_str_col->get_data_at(i);
329
19
                        col_to->insert_data(value.data, value.size);
330
19
                    }
331
31
                }
332
54
            }
333
192
        }
334
335
77
        if (is_nullable) {
336
58
            block.replace_by_position(
337
58
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
338
58
        } else {
339
19
            block.replace_by_position(result, std::move(col_to));
340
19
        }
341
342
17
        return Status::OK();
343
14
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
185
26
                        uint32_t result, size_t input_rows_count) const override {
186
26
        auto&& [col_from, col_from_is_const] =
187
26
                unpack_if_const(block.get_by_position(arguments[0]).column);
188
189
26
        if (col_from_is_const && col_from->is_null_at(0)) {
190
0
            auto col_str = ColumnString::create();
191
0
            col_str->insert_default();
192
0
            auto null_map = ColumnUInt8::create(1, 1);
193
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
194
0
            block.get_by_position(result).column =
195
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
196
0
            return Status::OK();
197
0
        }
198
199
26
        auto null_map = ColumnUInt8::create(0, 0);
200
26
        bool is_nullable = false;
201
202
26
        switch (nullable_mode) {
203
0
        case NullalbeMode::NULLABLE: {
204
0
            is_nullable = true;
205
0
            break;
206
0
        }
207
26
        case NullalbeMode::FOLLOW_INPUT: {
208
26
            for (auto arg : arguments) {
209
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
210
26
            }
211
26
            break;
212
0
        }
213
26
        }
214
215
26
        if (is_nullable) {
216
17
            null_map = ColumnUInt8::create(input_rows_count, 0);
217
17
        }
218
219
26
        const ColumnString* col_from_string = nullptr;
220
26
        if (col_from->is_nullable()) {
221
17
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
222
223
17
            VectorizedUtils::update_null_map(null_map->get_data(),
224
17
                                             nullable_col.get_null_map_data());
225
17
            col_from_string =
226
17
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
227
17
        } else {
228
9
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
229
9
        }
230
231
26
        StringRef constant_default_value;
232
26
        bool default_value_const = false;
233
26
        bool default_value_null_const = false;
234
26
        ColumnPtr default_value_col;
235
26
        JsonBinaryValue default_jsonb_value_parser;
236
26
        const ColumnString* default_value_str_col = nullptr;
237
26
        const NullMap* default_value_nullmap = nullptr;
238
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
239
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
240
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
241
            if (state && state->has_const_default_value) {
242
                constant_default_value = state->default_value;
243
                default_value_null_const = state->default_is_null;
244
                default_value_const = true;
245
            } else if (arguments.size() > 1) {
246
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
247
                    PrimitiveType::TYPE_JSONB) {
248
                    return Status::InvalidArgument(
249
                            "{} second argument should be jsonb type, but got {}", get_name(),
250
                            block.get_by_position(arguments[1]).type->get_name());
251
                }
252
                std::tie(default_value_col, default_value_const) =
253
                        unpack_if_const(block.get_by_position(arguments[1]).column);
254
                if (default_value_const) {
255
                    const JsonbDocument* default_value_doc = nullptr;
256
                    if (default_value_col->is_null_at(0)) {
257
                        default_value_null_const = true;
258
                    } else {
259
                        auto data = default_value_col->get_data_at(0);
260
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
261
                                                                              &default_value_doc));
262
                        constant_default_value = data;
263
                    }
264
                } else {
265
                    if (default_value_col->is_nullable()) {
266
                        const auto& nullable_col =
267
                                assert_cast<const ColumnNullable&>(*default_value_col);
268
                        default_value_str_col = assert_cast<const ColumnString*>(
269
                                nullable_col.get_nested_column_ptr().get());
270
                        default_value_nullmap = &(nullable_col.get_null_map_data());
271
                    } else {
272
                        default_value_str_col =
273
                                assert_cast<const ColumnString*>(default_value_col.get());
274
                    }
275
                }
276
            } else if (arguments.size() == 1) {
277
                // parse default value '{}' should always success.
278
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
279
                default_value_const = true;
280
                constant_default_value.data = default_jsonb_value_parser.value();
281
                constant_default_value.size = default_jsonb_value_parser.size();
282
            }
283
        }
284
285
26
        auto col_to = ColumnString::create();
286
287
26
        col_to->reserve(input_rows_count);
288
289
26
        auto& null_map_data = null_map->get_data();
290
291
        // parser can be reused for performance
292
26
        JsonBinaryValue jsonb_value;
293
294
68
        for (size_t i = 0; i < input_rows_count; ++i) {
295
42
            if (is_nullable && null_map_data[i]) {
296
1
                col_to->insert_default();
297
1
                continue;
298
1
            }
299
300
41
            auto index = index_check_const(i, col_from_is_const);
301
41
            const auto& val = col_from_string->get_data_at(index);
302
41
            auto st = jsonb_value.from_json_string(val.data, val.size);
303
41
            if (st.ok()) {
304
                // insert jsonb format data
305
35
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
306
35
            } else {
307
6
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
308
6
                    return Status::InvalidArgument(
309
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
310
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
311
                    null_map_data[i] = 1;
312
                    col_to->insert_default();
313
                } else {
314
                    if (default_value_const) {
315
                        if (default_value_null_const) {
316
                            null_map_data[i] = 1;
317
                            col_to->insert_default();
318
                        } else {
319
                            col_to->insert_data(constant_default_value.data,
320
                                                constant_default_value.size);
321
                        }
322
                    } else {
323
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
324
                            null_map_data[i] = 1;
325
                            col_to->insert_default();
326
                            continue;
327
                        }
328
                        auto value = default_value_str_col->get_data_at(i);
329
                        col_to->insert_data(value.data, value.size);
330
                    }
331
                }
332
6
            }
333
41
        }
334
335
26
        if (is_nullable) {
336
11
            block.replace_by_position(
337
11
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
338
15
        } else {
339
15
            block.replace_by_position(result, std::move(col_to));
340
15
        }
341
342
26
        return Status::OK();
343
26
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
185
34
                        uint32_t result, size_t input_rows_count) const override {
186
34
        auto&& [col_from, col_from_is_const] =
187
34
                unpack_if_const(block.get_by_position(arguments[0]).column);
188
189
34
        if (col_from_is_const && col_from->is_null_at(0)) {
190
0
            auto col_str = ColumnString::create();
191
0
            col_str->insert_default();
192
0
            auto null_map = ColumnUInt8::create(1, 1);
193
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
194
0
            block.get_by_position(result).column =
195
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
196
0
            return Status::OK();
197
0
        }
198
199
34
        auto null_map = ColumnUInt8::create(0, 0);
200
34
        bool is_nullable = false;
201
202
34
        switch (nullable_mode) {
203
34
        case NullalbeMode::NULLABLE: {
204
34
            is_nullable = true;
205
34
            break;
206
0
        }
207
0
        case NullalbeMode::FOLLOW_INPUT: {
208
0
            for (auto arg : arguments) {
209
0
                is_nullable |= block.get_by_position(arg).type->is_nullable();
210
0
            }
211
0
            break;
212
0
        }
213
34
        }
214
215
34
        if (is_nullable) {
216
34
            null_map = ColumnUInt8::create(input_rows_count, 0);
217
34
        }
218
219
34
        const ColumnString* col_from_string = nullptr;
220
34
        if (col_from->is_nullable()) {
221
11
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
222
223
11
            VectorizedUtils::update_null_map(null_map->get_data(),
224
11
                                             nullable_col.get_null_map_data());
225
11
            col_from_string =
226
11
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
227
23
        } else {
228
23
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
229
23
        }
230
231
34
        StringRef constant_default_value;
232
34
        bool default_value_const = false;
233
34
        bool default_value_null_const = false;
234
34
        ColumnPtr default_value_col;
235
34
        JsonBinaryValue default_jsonb_value_parser;
236
34
        const ColumnString* default_value_str_col = nullptr;
237
34
        const NullMap* default_value_nullmap = nullptr;
238
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
239
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
240
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
241
            if (state && state->has_const_default_value) {
242
                constant_default_value = state->default_value;
243
                default_value_null_const = state->default_is_null;
244
                default_value_const = true;
245
            } else if (arguments.size() > 1) {
246
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
247
                    PrimitiveType::TYPE_JSONB) {
248
                    return Status::InvalidArgument(
249
                            "{} second argument should be jsonb type, but got {}", get_name(),
250
                            block.get_by_position(arguments[1]).type->get_name());
251
                }
252
                std::tie(default_value_col, default_value_const) =
253
                        unpack_if_const(block.get_by_position(arguments[1]).column);
254
                if (default_value_const) {
255
                    const JsonbDocument* default_value_doc = nullptr;
256
                    if (default_value_col->is_null_at(0)) {
257
                        default_value_null_const = true;
258
                    } else {
259
                        auto data = default_value_col->get_data_at(0);
260
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
261
                                                                              &default_value_doc));
262
                        constant_default_value = data;
263
                    }
264
                } else {
265
                    if (default_value_col->is_nullable()) {
266
                        const auto& nullable_col =
267
                                assert_cast<const ColumnNullable&>(*default_value_col);
268
                        default_value_str_col = assert_cast<const ColumnString*>(
269
                                nullable_col.get_nested_column_ptr().get());
270
                        default_value_nullmap = &(nullable_col.get_null_map_data());
271
                    } else {
272
                        default_value_str_col =
273
                                assert_cast<const ColumnString*>(default_value_col.get());
274
                    }
275
                }
276
            } else if (arguments.size() == 1) {
277
                // parse default value '{}' should always success.
278
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
279
                default_value_const = true;
280
                constant_default_value.data = default_jsonb_value_parser.value();
281
                constant_default_value.size = default_jsonb_value_parser.size();
282
            }
283
        }
284
285
34
        auto col_to = ColumnString::create();
286
287
34
        col_to->reserve(input_rows_count);
288
289
34
        auto& null_map_data = null_map->get_data();
290
291
        // parser can be reused for performance
292
34
        JsonBinaryValue jsonb_value;
293
294
99
        for (size_t i = 0; i < input_rows_count; ++i) {
295
65
            if (is_nullable && null_map_data[i]) {
296
6
                col_to->insert_default();
297
6
                continue;
298
6
            }
299
300
59
            auto index = index_check_const(i, col_from_is_const);
301
59
            const auto& val = col_from_string->get_data_at(index);
302
59
            auto st = jsonb_value.from_json_string(val.data, val.size);
303
59
            if (st.ok()) {
304
                // insert jsonb format data
305
42
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
306
42
            } else {
307
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
308
                    return Status::InvalidArgument(
309
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
310
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
311
17
                    null_map_data[i] = 1;
312
17
                    col_to->insert_default();
313
                } else {
314
                    if (default_value_const) {
315
                        if (default_value_null_const) {
316
                            null_map_data[i] = 1;
317
                            col_to->insert_default();
318
                        } else {
319
                            col_to->insert_data(constant_default_value.data,
320
                                                constant_default_value.size);
321
                        }
322
                    } else {
323
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
324
                            null_map_data[i] = 1;
325
                            col_to->insert_default();
326
                            continue;
327
                        }
328
                        auto value = default_value_str_col->get_data_at(i);
329
                        col_to->insert_data(value.data, value.size);
330
                    }
331
                }
332
17
            }
333
59
        }
334
335
34
        if (is_nullable) {
336
34
            block.replace_by_position(
337
34
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
338
34
        } else {
339
0
            block.replace_by_position(result, std::move(col_to));
340
0
        }
341
342
34
        return Status::OK();
343
34
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
185
15
                        uint32_t result, size_t input_rows_count) const override {
186
15
        auto&& [col_from, col_from_is_const] =
187
15
                unpack_if_const(block.get_by_position(arguments[0]).column);
188
189
15
        if (col_from_is_const && col_from->is_null_at(0)) {
190
1
            auto col_str = ColumnString::create();
191
1
            col_str->insert_default();
192
1
            auto null_map = ColumnUInt8::create(1, 1);
193
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
194
1
            block.get_by_position(result).column =
195
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
196
1
            return Status::OK();
197
1
        }
198
199
14
        auto null_map = ColumnUInt8::create(0, 0);
200
14
        bool is_nullable = false;
201
202
14
        switch (nullable_mode) {
203
0
        case NullalbeMode::NULLABLE: {
204
0
            is_nullable = true;
205
0
            break;
206
0
        }
207
14
        case NullalbeMode::FOLLOW_INPUT: {
208
26
            for (auto arg : arguments) {
209
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
210
26
            }
211
14
            break;
212
0
        }
213
14
        }
214
215
14
        if (is_nullable) {
216
13
            null_map = ColumnUInt8::create(input_rows_count, 0);
217
13
        }
218
219
14
        const ColumnString* col_from_string = nullptr;
220
14
        if (col_from->is_nullable()) {
221
11
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
222
223
11
            VectorizedUtils::update_null_map(null_map->get_data(),
224
11
                                             nullable_col.get_null_map_data());
225
11
            col_from_string =
226
11
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
227
11
        } else {
228
3
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
229
3
        }
230
231
14
        StringRef constant_default_value;
232
14
        bool default_value_const = false;
233
14
        bool default_value_null_const = false;
234
14
        ColumnPtr default_value_col;
235
14
        JsonBinaryValue default_jsonb_value_parser;
236
14
        const ColumnString* default_value_str_col = nullptr;
237
14
        const NullMap* default_value_nullmap = nullptr;
238
14
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
239
14
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
240
14
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
241
14
            if (state && state->has_const_default_value) {
242
7
                constant_default_value = state->default_value;
243
7
                default_value_null_const = state->default_is_null;
244
7
                default_value_const = true;
245
7
            } else if (arguments.size() > 1) {
246
7
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
247
7
                    PrimitiveType::TYPE_JSONB) {
248
1
                    return Status::InvalidArgument(
249
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
250
1
                            block.get_by_position(arguments[1]).type->get_name());
251
1
                }
252
6
                std::tie(default_value_col, default_value_const) =
253
6
                        unpack_if_const(block.get_by_position(arguments[1]).column);
254
6
                if (default_value_const) {
255
1
                    const JsonbDocument* default_value_doc = nullptr;
256
1
                    if (default_value_col->is_null_at(0)) {
257
1
                        default_value_null_const = true;
258
1
                    } else {
259
0
                        auto data = default_value_col->get_data_at(0);
260
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
261
0
                                                                              &default_value_doc));
262
0
                        constant_default_value = data;
263
0
                    }
264
5
                } else {
265
5
                    if (default_value_col->is_nullable()) {
266
4
                        const auto& nullable_col =
267
4
                                assert_cast<const ColumnNullable&>(*default_value_col);
268
4
                        default_value_str_col = assert_cast<const ColumnString*>(
269
4
                                nullable_col.get_nested_column_ptr().get());
270
4
                        default_value_nullmap = &(nullable_col.get_null_map_data());
271
4
                    } else {
272
1
                        default_value_str_col =
273
1
                                assert_cast<const ColumnString*>(default_value_col.get());
274
1
                    }
275
5
                }
276
6
            } else if (arguments.size() == 1) {
277
                // parse default value '{}' should always success.
278
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
279
0
                default_value_const = true;
280
0
                constant_default_value.data = default_jsonb_value_parser.value();
281
0
                constant_default_value.size = default_jsonb_value_parser.size();
282
0
            }
283
14
        }
284
285
13
        auto col_to = ColumnString::create();
286
287
14
        col_to->reserve(input_rows_count);
288
289
14
        auto& null_map_data = null_map->get_data();
290
291
        // parser can be reused for performance
292
14
        JsonBinaryValue jsonb_value;
293
294
115
        for (size_t i = 0; i < input_rows_count; ++i) {
295
98
            if (is_nullable && null_map_data[i]) {
296
6
                col_to->insert_default();
297
6
                continue;
298
6
            }
299
300
92
            auto index = index_check_const(i, col_from_is_const);
301
92
            const auto& val = col_from_string->get_data_at(index);
302
92
            auto st = jsonb_value.from_json_string(val.data, val.size);
303
92
            if (st.ok()) {
304
                // insert jsonb format data
305
61
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
306
61
            } else {
307
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
308
                    return Status::InvalidArgument(
309
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
310
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
311
                    null_map_data[i] = 1;
312
                    col_to->insert_default();
313
31
                } else {
314
31
                    if (default_value_const) {
315
9
                        if (default_value_null_const) {
316
3
                            null_map_data[i] = 1;
317
3
                            col_to->insert_default();
318
6
                        } else {
319
6
                            col_to->insert_data(constant_default_value.data,
320
6
                                                constant_default_value.size);
321
6
                        }
322
22
                    } else {
323
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
324
3
                            null_map_data[i] = 1;
325
3
                            col_to->insert_default();
326
3
                            continue;
327
3
                        }
328
19
                        auto value = default_value_str_col->get_data_at(i);
329
19
                        col_to->insert_data(value.data, value.size);
330
19
                    }
331
31
                }
332
31
            }
333
92
        }
334
335
17
        if (is_nullable) {
336
13
            block.replace_by_position(
337
13
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
338
13
        } else {
339
4
            block.replace_by_position(result, std::move(col_to));
340
4
        }
341
342
17
        return Status::OK();
343
14
    }
344
};
345
346
// jsonb_parse return type nullable as input
347
using FunctionJsonbParse =
348
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>;
349
using FunctionJsonbParseErrorNull =
350
        FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>;
351
using FunctionJsonbParseErrorValue =
352
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>;
353
354
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
355
template <typename Impl>
356
class FunctionJsonbExtract : public IFunction {
357
public:
358
    static constexpr auto name = Impl::name;
359
    static constexpr auto alias = Impl::alias;
360
1.66k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv
Line
Count
Source
360
149
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv
Line
Count
Source
360
145
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv
Line
Count
Source
360
1.35k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv
Line
Count
Source
360
15
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
361
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev
362
1.63k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv
Line
Count
Source
362
141
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv
Line
Count
Source
362
137
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv
Line
Count
Source
362
1.34k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv
Line
Count
Source
362
7
    bool is_variadic() const override { return true; }
363
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv
Line
Count
Source
363
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv
364
14.1k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv
Line
Count
Source
364
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv
Line
Count
Source
364
1.45k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv
Line
Count
Source
364
11.2k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv
Line
Count
Source
364
12
    bool use_default_implementation_for_nulls() const override { return false; }
365
1.62k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
366
1.62k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
367
1.62k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
365
140
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
366
140
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
367
140
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
365
136
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
366
136
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
367
136
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
365
1.34k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
366
1.34k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
367
1.34k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
365
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
366
6
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
367
6
    }
368
32
    DataTypes get_variadic_argument_types_impl() const override {
369
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
370
            return Impl::get_variadic_argument_types_impl();
371
32
        } else {
372
32
            return {};
373
32
        }
374
32
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
368
8
    DataTypes get_variadic_argument_types_impl() const override {
369
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
370
            return Impl::get_variadic_argument_types_impl();
371
8
        } else {
372
8
            return {};
373
8
        }
374
8
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv
Line
Count
Source
368
8
    DataTypes get_variadic_argument_types_impl() const override {
369
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
370
            return Impl::get_variadic_argument_types_impl();
371
8
        } else {
372
8
            return {};
373
8
        }
374
8
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv
Line
Count
Source
368
8
    DataTypes get_variadic_argument_types_impl() const override {
369
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
370
            return Impl::get_variadic_argument_types_impl();
371
8
        } else {
372
8
            return {};
373
8
        }
374
8
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv
Line
Count
Source
368
8
    DataTypes get_variadic_argument_types_impl() const override {
369
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
370
            return Impl::get_variadic_argument_types_impl();
371
8
        } else {
372
8
            return {};
373
8
        }
374
8
    }
375
376
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
377
12.5k
                        uint32_t result, size_t input_rows_count) const override {
378
12.5k
        DCHECK_GE(arguments.size(), 2);
379
380
12.5k
        ColumnPtr jsonb_data_column;
381
12.5k
        bool jsonb_data_const = false;
382
12.5k
        const NullMap* data_null_map = nullptr;
383
384
12.5k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
385
12.5k
            PrimitiveType::TYPE_JSONB) {
386
1
            return Status::InvalidArgument(
387
1
                    "jsonb_extract first argument should be json type, but got {}",
388
1
                    block.get_by_position(arguments[0]).type->get_name());
389
1
        }
390
391
        // prepare jsonb data column
392
12.5k
        std::tie(jsonb_data_column, jsonb_data_const) =
393
12.5k
                unpack_if_const(block.get_by_position(arguments[0]).column);
394
12.5k
        if (jsonb_data_column->is_nullable()) {
395
10.7k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
396
10.7k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
397
10.7k
            data_null_map = &nullable_column.get_null_map_data();
398
10.7k
        }
399
12.5k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
400
12.5k
        const auto& loffsets =
401
12.5k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
402
403
        // prepare parse path column prepare
404
12.5k
        std::vector<const ColumnString*> jsonb_path_columns;
405
12.5k
        std::vector<bool> path_const(arguments.size() - 1);
406
12.5k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
407
25.4k
        for (int i = 0; i < arguments.size() - 1; ++i) {
408
12.8k
            ColumnPtr path_column;
409
12.8k
            bool is_const = false;
410
12.8k
            std::tie(path_column, is_const) =
411
12.8k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
412
12.8k
            path_const[i] = is_const;
413
12.8k
            if (path_column->is_nullable()) {
414
66
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
415
66
                path_column = nullable_column.get_nested_column_ptr();
416
66
                path_null_maps[i] = &nullable_column.get_null_map_data();
417
66
            }
418
12.8k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
419
12.8k
        }
420
421
12.5k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
422
12.5k
        auto res = Impl::ColumnType::create();
423
424
        // execute Impl
425
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
426
11.2k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
427
11.2k
            auto& res_data = res->get_chars();
428
11.2k
            auto& res_offsets = res->get_offsets();
429
11.2k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
430
11.2k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
431
11.2k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
432
11.2k
        } else {
433
            // not support other extract type for now (e.g. int, double, ...)
434
1.32k
            DCHECK_EQ(jsonb_path_columns.size(), 1);
435
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
436
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
437
438
1.32k
            auto create_all_null_result = [&]() {
439
2
                res = Impl::ColumnType::create();
440
2
                res->insert_default();
441
2
                auto nullable_column =
442
2
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
443
2
                auto const_column =
444
2
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
445
2
                block.get_by_position(result).column = std::move(const_column);
446
2
                return Status::OK();
447
2
            };
448
449
1.32k
            if (jsonb_data_const) {
450
2
                if (data_null_map && (*data_null_map)[0]) {
451
1
                    return create_all_null_result();
452
1
                }
453
454
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
455
1
                                                    rdata, roffsets, path_null_maps[0],
456
1
                                                    res->get_data(), null_map->get_data()));
457
1.32k
            } else if (path_const[0]) {
458
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
459
1
                    return create_all_null_result();
460
1
                }
461
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
462
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
463
1.32k
                                                    res->get_data(), null_map->get_data()));
464
1.32k
            } else {
465
0
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
466
0
                                                    roffsets, path_null_maps[0], res->get_data(),
467
0
                                                    null_map->get_data()));
468
0
            }
469
1.32k
        }
470
471
12.5k
        block.get_by_position(result).column =
472
12.5k
                ColumnNullable::create(std::move(res), std::move(null_map));
473
12.5k
        return Status::OK();
474
12.5k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
377
1.32k
                        uint32_t result, size_t input_rows_count) const override {
378
1.32k
        DCHECK_GE(arguments.size(), 2);
379
380
1.32k
        ColumnPtr jsonb_data_column;
381
1.32k
        bool jsonb_data_const = false;
382
1.32k
        const NullMap* data_null_map = nullptr;
383
384
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
385
1.32k
            PrimitiveType::TYPE_JSONB) {
386
0
            return Status::InvalidArgument(
387
0
                    "jsonb_extract first argument should be json type, but got {}",
388
0
                    block.get_by_position(arguments[0]).type->get_name());
389
0
        }
390
391
        // prepare jsonb data column
392
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
393
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
394
1.32k
        if (jsonb_data_column->is_nullable()) {
395
1.14k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
396
1.14k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
397
1.14k
            data_null_map = &nullable_column.get_null_map_data();
398
1.14k
        }
399
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
400
1.32k
        const auto& loffsets =
401
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
402
403
        // prepare parse path column prepare
404
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
405
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
406
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
407
2.64k
        for (int i = 0; i < arguments.size() - 1; ++i) {
408
1.32k
            ColumnPtr path_column;
409
1.32k
            bool is_const = false;
410
1.32k
            std::tie(path_column, is_const) =
411
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
412
1.32k
            path_const[i] = is_const;
413
1.32k
            if (path_column->is_nullable()) {
414
5
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
415
5
                path_column = nullable_column.get_nested_column_ptr();
416
5
                path_null_maps[i] = &nullable_column.get_null_map_data();
417
5
            }
418
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
419
1.32k
        }
420
421
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
422
1.32k
        auto res = Impl::ColumnType::create();
423
424
        // execute Impl
425
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
426
1.32k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
427
1.32k
            auto& res_data = res->get_chars();
428
1.32k
            auto& res_offsets = res->get_offsets();
429
1.32k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
430
1.32k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
431
1.32k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
432
        } else {
433
            // not support other extract type for now (e.g. int, double, ...)
434
            DCHECK_EQ(jsonb_path_columns.size(), 1);
435
            const auto& rdata = jsonb_path_columns[0]->get_chars();
436
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
437
438
            auto create_all_null_result = [&]() {
439
                res = Impl::ColumnType::create();
440
                res->insert_default();
441
                auto nullable_column =
442
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
443
                auto const_column =
444
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
445
                block.get_by_position(result).column = std::move(const_column);
446
                return Status::OK();
447
            };
448
449
            if (jsonb_data_const) {
450
                if (data_null_map && (*data_null_map)[0]) {
451
                    return create_all_null_result();
452
                }
453
454
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
455
                                                    rdata, roffsets, path_null_maps[0],
456
                                                    res->get_data(), null_map->get_data()));
457
            } else if (path_const[0]) {
458
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
459
                    return create_all_null_result();
460
                }
461
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
462
                                                    jsonb_path_columns[0]->get_data_at(0),
463
                                                    res->get_data(), null_map->get_data()));
464
            } else {
465
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
466
                                                    roffsets, path_null_maps[0], res->get_data(),
467
                                                    null_map->get_data()));
468
            }
469
        }
470
471
1.32k
        block.get_by_position(result).column =
472
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
473
1.32k
        return Status::OK();
474
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
377
1.32k
                        uint32_t result, size_t input_rows_count) const override {
378
1.32k
        DCHECK_GE(arguments.size(), 2);
379
380
1.32k
        ColumnPtr jsonb_data_column;
381
1.32k
        bool jsonb_data_const = false;
382
1.32k
        const NullMap* data_null_map = nullptr;
383
384
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
385
1.32k
            PrimitiveType::TYPE_JSONB) {
386
0
            return Status::InvalidArgument(
387
0
                    "jsonb_extract first argument should be json type, but got {}",
388
0
                    block.get_by_position(arguments[0]).type->get_name());
389
0
        }
390
391
        // prepare jsonb data column
392
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
393
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
394
1.32k
        if (jsonb_data_column->is_nullable()) {
395
1.14k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
396
1.14k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
397
1.14k
            data_null_map = &nullable_column.get_null_map_data();
398
1.14k
        }
399
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
400
1.32k
        const auto& loffsets =
401
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
402
403
        // prepare parse path column prepare
404
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
405
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
406
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
407
2.64k
        for (int i = 0; i < arguments.size() - 1; ++i) {
408
1.32k
            ColumnPtr path_column;
409
1.32k
            bool is_const = false;
410
1.32k
            std::tie(path_column, is_const) =
411
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
412
1.32k
            path_const[i] = is_const;
413
1.32k
            if (path_column->is_nullable()) {
414
4
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
415
4
                path_column = nullable_column.get_nested_column_ptr();
416
4
                path_null_maps[i] = &nullable_column.get_null_map_data();
417
4
            }
418
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
419
1.32k
        }
420
421
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
422
1.32k
        auto res = Impl::ColumnType::create();
423
424
        // execute Impl
425
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
426
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
427
            auto& res_data = res->get_chars();
428
            auto& res_offsets = res->get_offsets();
429
            RETURN_IF_ERROR(Impl::vector_vector_v2(
430
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
431
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
432
1.32k
        } else {
433
            // not support other extract type for now (e.g. int, double, ...)
434
1.32k
            DCHECK_EQ(jsonb_path_columns.size(), 1);
435
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
436
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
437
438
1.32k
            auto create_all_null_result = [&]() {
439
1.32k
                res = Impl::ColumnType::create();
440
1.32k
                res->insert_default();
441
1.32k
                auto nullable_column =
442
1.32k
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
443
1.32k
                auto const_column =
444
1.32k
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
445
1.32k
                block.get_by_position(result).column = std::move(const_column);
446
1.32k
                return Status::OK();
447
1.32k
            };
448
449
1.32k
            if (jsonb_data_const) {
450
2
                if (data_null_map && (*data_null_map)[0]) {
451
1
                    return create_all_null_result();
452
1
                }
453
454
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
455
1
                                                    rdata, roffsets, path_null_maps[0],
456
1
                                                    res->get_data(), null_map->get_data()));
457
1.32k
            } else if (path_const[0]) {
458
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
459
1
                    return create_all_null_result();
460
1
                }
461
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
462
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
463
1.32k
                                                    res->get_data(), null_map->get_data()));
464
1.32k
            } else {
465
0
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
466
0
                                                    roffsets, path_null_maps[0], res->get_data(),
467
0
                                                    null_map->get_data()));
468
0
            }
469
1.32k
        }
470
471
1.32k
        block.get_by_position(result).column =
472
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
473
1.32k
        return Status::OK();
474
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
377
9.88k
                        uint32_t result, size_t input_rows_count) const override {
378
9.88k
        DCHECK_GE(arguments.size(), 2);
379
380
9.88k
        ColumnPtr jsonb_data_column;
381
9.88k
        bool jsonb_data_const = false;
382
9.88k
        const NullMap* data_null_map = nullptr;
383
384
9.88k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
385
9.88k
            PrimitiveType::TYPE_JSONB) {
386
1
            return Status::InvalidArgument(
387
1
                    "jsonb_extract first argument should be json type, but got {}",
388
1
                    block.get_by_position(arguments[0]).type->get_name());
389
1
        }
390
391
        // prepare jsonb data column
392
9.88k
        std::tie(jsonb_data_column, jsonb_data_const) =
393
9.88k
                unpack_if_const(block.get_by_position(arguments[0]).column);
394
9.88k
        if (jsonb_data_column->is_nullable()) {
395
8.45k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
396
8.45k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
397
8.45k
            data_null_map = &nullable_column.get_null_map_data();
398
8.45k
        }
399
9.88k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
400
9.88k
        const auto& loffsets =
401
9.88k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
402
403
        // prepare parse path column prepare
404
9.88k
        std::vector<const ColumnString*> jsonb_path_columns;
405
9.88k
        std::vector<bool> path_const(arguments.size() - 1);
406
9.88k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
407
20.1k
        for (int i = 0; i < arguments.size() - 1; ++i) {
408
10.2k
            ColumnPtr path_column;
409
10.2k
            bool is_const = false;
410
10.2k
            std::tie(path_column, is_const) =
411
10.2k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
412
10.2k
            path_const[i] = is_const;
413
10.2k
            if (path_column->is_nullable()) {
414
56
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
415
56
                path_column = nullable_column.get_nested_column_ptr();
416
56
                path_null_maps[i] = &nullable_column.get_null_map_data();
417
56
            }
418
10.2k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
419
10.2k
        }
420
421
9.88k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
422
9.88k
        auto res = Impl::ColumnType::create();
423
424
        // execute Impl
425
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
426
9.88k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
427
9.88k
            auto& res_data = res->get_chars();
428
9.88k
            auto& res_offsets = res->get_offsets();
429
9.88k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
430
9.88k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
431
9.88k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
432
        } else {
433
            // not support other extract type for now (e.g. int, double, ...)
434
            DCHECK_EQ(jsonb_path_columns.size(), 1);
435
            const auto& rdata = jsonb_path_columns[0]->get_chars();
436
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
437
438
            auto create_all_null_result = [&]() {
439
                res = Impl::ColumnType::create();
440
                res->insert_default();
441
                auto nullable_column =
442
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
443
                auto const_column =
444
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
445
                block.get_by_position(result).column = std::move(const_column);
446
                return Status::OK();
447
            };
448
449
            if (jsonb_data_const) {
450
                if (data_null_map && (*data_null_map)[0]) {
451
                    return create_all_null_result();
452
                }
453
454
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
455
                                                    rdata, roffsets, path_null_maps[0],
456
                                                    res->get_data(), null_map->get_data()));
457
            } else if (path_const[0]) {
458
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
459
                    return create_all_null_result();
460
                }
461
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
462
                                                    jsonb_path_columns[0]->get_data_at(0),
463
                                                    res->get_data(), null_map->get_data()));
464
            } else {
465
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
466
                                                    roffsets, path_null_maps[0], res->get_data(),
467
                                                    null_map->get_data()));
468
            }
469
        }
470
471
9.87k
        block.get_by_position(result).column =
472
9.88k
                ColumnNullable::create(std::move(res), std::move(null_map));
473
9.88k
        return Status::OK();
474
9.88k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
377
6
                        uint32_t result, size_t input_rows_count) const override {
378
6
        DCHECK_GE(arguments.size(), 2);
379
380
6
        ColumnPtr jsonb_data_column;
381
6
        bool jsonb_data_const = false;
382
6
        const NullMap* data_null_map = nullptr;
383
384
6
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
385
6
            PrimitiveType::TYPE_JSONB) {
386
0
            return Status::InvalidArgument(
387
0
                    "jsonb_extract first argument should be json type, but got {}",
388
0
                    block.get_by_position(arguments[0]).type->get_name());
389
0
        }
390
391
        // prepare jsonb data column
392
6
        std::tie(jsonb_data_column, jsonb_data_const) =
393
6
                unpack_if_const(block.get_by_position(arguments[0]).column);
394
6
        if (jsonb_data_column->is_nullable()) {
395
6
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
396
6
            jsonb_data_column = nullable_column.get_nested_column_ptr();
397
6
            data_null_map = &nullable_column.get_null_map_data();
398
6
        }
399
6
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
400
6
        const auto& loffsets =
401
6
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
402
403
        // prepare parse path column prepare
404
6
        std::vector<const ColumnString*> jsonb_path_columns;
405
6
        std::vector<bool> path_const(arguments.size() - 1);
406
6
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
407
16
        for (int i = 0; i < arguments.size() - 1; ++i) {
408
10
            ColumnPtr path_column;
409
10
            bool is_const = false;
410
10
            std::tie(path_column, is_const) =
411
10
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
412
10
            path_const[i] = is_const;
413
10
            if (path_column->is_nullable()) {
414
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
415
1
                path_column = nullable_column.get_nested_column_ptr();
416
1
                path_null_maps[i] = &nullable_column.get_null_map_data();
417
1
            }
418
10
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
419
10
        }
420
421
6
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
422
6
        auto res = Impl::ColumnType::create();
423
424
        // execute Impl
425
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
426
6
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
427
6
            auto& res_data = res->get_chars();
428
6
            auto& res_offsets = res->get_offsets();
429
6
            RETURN_IF_ERROR(Impl::vector_vector_v2(
430
6
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
431
6
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
432
        } else {
433
            // not support other extract type for now (e.g. int, double, ...)
434
            DCHECK_EQ(jsonb_path_columns.size(), 1);
435
            const auto& rdata = jsonb_path_columns[0]->get_chars();
436
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
437
438
            auto create_all_null_result = [&]() {
439
                res = Impl::ColumnType::create();
440
                res->insert_default();
441
                auto nullable_column =
442
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
443
                auto const_column =
444
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
445
                block.get_by_position(result).column = std::move(const_column);
446
                return Status::OK();
447
            };
448
449
            if (jsonb_data_const) {
450
                if (data_null_map && (*data_null_map)[0]) {
451
                    return create_all_null_result();
452
                }
453
454
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
455
                                                    rdata, roffsets, path_null_maps[0],
456
                                                    res->get_data(), null_map->get_data()));
457
            } else if (path_const[0]) {
458
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
459
                    return create_all_null_result();
460
                }
461
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
462
                                                    jsonb_path_columns[0]->get_data_at(0),
463
                                                    res->get_data(), null_map->get_data()));
464
            } else {
465
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
466
                                                    roffsets, path_null_maps[0], res->get_data(),
467
                                                    null_map->get_data()));
468
            }
469
        }
470
471
6
        block.get_by_position(result).column =
472
6
                ColumnNullable::create(std::move(res), std::move(null_map));
473
6
        return Status::OK();
474
6
    }
475
};
476
477
class FunctionJsonbKeys : public IFunction {
478
public:
479
    static constexpr auto name = "json_keys";
480
    static constexpr auto alias = "jsonb_keys";
481
48
    static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); }
482
0
    String get_name() const override { return name; }
483
40
    bool is_variadic() const override { return true; }
484
0
    size_t get_number_of_arguments() const override { return 0; }
485
486
140
    bool use_default_implementation_for_nulls() const override { return false; }
487
488
39
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
489
39
        return make_nullable(
490
39
                std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
491
39
    }
492
493
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
494
101
                        uint32_t result, size_t input_rows_count) const override {
495
101
        DCHECK_GE(arguments.size(), 1);
496
101
        DCHECK(arguments.size() == 1 || arguments.size() == 2)
497
0
                << "json_keys should have 1 or 2 arguments, but got " << arguments.size();
498
499
101
        const NullMap* data_null_map = nullptr;
500
101
        const ColumnString* col_from_string = nullptr;
501
        // prepare jsonb data column
502
101
        auto&& [jsonb_data_column, json_data_const] =
503
101
                unpack_if_const(block.get_by_position(arguments[0]).column);
504
101
        if (jsonb_data_column->is_nullable()) {
505
97
            const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get());
506
97
            col_from_string =
507
97
                    assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
508
97
            data_null_map = &nullable->get_null_map_data();
509
97
        } else {
510
4
            col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get());
511
4
        }
512
513
        // prepare parse path column prepare, maybe we do not have path column
514
101
        ColumnPtr jsonb_path_column = nullptr;
515
101
        const ColumnString* jsonb_path_col = nullptr;
516
101
        bool path_const = false;
517
101
        const NullMap* path_null_map = nullptr;
518
101
        if (arguments.size() == 2) {
519
            // we have should have a ColumnString for path
520
74
            std::tie(jsonb_path_column, path_const) =
521
74
                    unpack_if_const(block.get_by_position(arguments[1]).column);
522
74
            if (jsonb_path_column->is_nullable()) {
523
10
                const auto* nullable =
524
10
                        check_and_get_column<ColumnNullable>(jsonb_path_column.get());
525
10
                jsonb_path_column = nullable->get_nested_column_ptr();
526
10
                path_null_map = &nullable->get_null_map_data();
527
10
            }
528
74
            jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get());
529
74
        }
530
531
101
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
532
101
        NullMap& res_null_map = null_map->get_data();
533
534
101
        auto dst_arr = ColumnArray::create(
535
101
                ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
536
101
                ColumnArray::ColumnOffsets::create());
537
101
        auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data());
538
539
101
        Status st = std::visit(
540
101
                [&](auto data_const, auto has_path, auto path_const) {
541
101
                    return inner_loop_impl<data_const, has_path, path_const>(
542
101
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
543
101
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
544
101
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
540
27
                [&](auto data_const, auto has_path, auto path_const) {
541
27
                    return inner_loop_impl<data_const, has_path, path_const>(
542
27
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
543
27
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
544
27
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
540
25
                [&](auto data_const, auto has_path, auto path_const) {
541
25
                    return inner_loop_impl<data_const, has_path, path_const>(
542
25
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
543
25
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
544
25
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
540
47
                [&](auto data_const, auto has_path, auto path_const) {
541
47
                    return inner_loop_impl<data_const, has_path, path_const>(
542
47
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
543
47
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
544
47
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
540
2
                [&](auto data_const, auto has_path, auto path_const) {
541
2
                    return inner_loop_impl<data_const, has_path, path_const>(
542
2
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
543
2
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
544
2
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
545
101
                make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column),
546
101
                make_bool_variant(path_const));
547
101
        if (!st.ok()) {
548
8
            return st;
549
8
        }
550
93
        block.get_by_position(result).column =
551
93
                ColumnNullable::create(std::move(dst_arr), std::move(null_map));
552
93
        return st;
553
101
    }
554
555
private:
556
    template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
557
    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr,
558
                                                ColumnNullable& dst_nested_column,
559
                                                NullMap& res_null_map,
560
                                                const ColumnString& col_from_string,
561
                                                const NullMap* jsonb_data_nullmap,
562
                                                const ColumnString* jsonb_path_column,
563
101
                                                const NullMap* path_null_map) {
564
        // if path is const, we just need to parse it once
565
101
        JsonbPath const_path;
566
101
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
567
47
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
568
47
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
569
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
570
1
                                               r_raw_ref.to_string());
571
1
            }
572
573
46
            if (const_path.is_wildcard()) {
574
1
                return Status::InvalidJsonPath(
575
1
                        "In this situation, path expressions may not contain the * and ** tokens "
576
1
                        "or an array range.");
577
1
            }
578
46
        }
579
580
377
        for (size_t i = 0; i < input_rows_count; ++i) {
581
268
            auto index = index_check_const(i, JSONB_DATA_CONST);
582
            // if jsonb data is null or path column is null , we should return null
583
268
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
584
23
                res_null_map[i] = 1;
585
23
                dst_arr.insert_default();
586
23
                continue;
587
23
            }
588
245
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
589
69
                if (path_null_map && (*path_null_map)[i]) {
590
8
                    res_null_map[i] = 1;
591
8
                    dst_arr.insert_default();
592
8
                    continue;
593
8
                }
594
69
            }
595
596
61
            auto json_data = col_from_string.get_data_at(index);
597
245
            const JsonbDocument* doc = nullptr;
598
245
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
599
245
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
600
0
                dst_arr.clear();
601
0
                return Status::InvalidArgument("jsonb data is invalid");
602
0
            }
603
245
            const JsonbValue* obj_val;
604
245
            JsonbFindResult find_result;
605
245
            if constexpr (JSONB_PATH_PARAM) {
606
191
                if constexpr (!JSON_PATH_CONST) {
607
69
                    auto data = jsonb_path_column->get_data_at(i);
608
69
                    JsonbPath path;
609
69
                    if (!path.seek(data.data, data.size)) {
610
5
                        return Status::InvalidArgument(
611
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
612
5
                                std::string_view(data.data, data.size), i);
613
5
                    }
614
615
64
                    if (path.is_wildcard()) {
616
1
                        return Status::InvalidJsonPath(
617
1
                                "In this situation, path expressions may not contain the * and ** "
618
1
                                "tokens "
619
1
                                "or an array range. at row: {}",
620
1
                                i);
621
1
                    }
622
63
                    find_result = doc->getValue()->findValue(path);
623
122
                } else {
624
122
                    find_result = doc->getValue()->findValue(const_path);
625
122
                }
626
0
                obj_val = find_result.value;
627
191
            } else {
628
54
                obj_val = doc->getValue();
629
54
            }
630
631
245
            if (!obj_val || !obj_val->isObject()) {
632
                // if jsonb data is not object we should return null
633
182
                res_null_map[i] = 1;
634
182
                dst_arr.insert_default();
635
182
                continue;
636
182
            }
637
63
            const auto* obj = obj_val->unpack<ObjectVal>();
638
75
            for (const auto& it : *obj) {
639
75
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
640
75
            }
641
63
            dst_arr.get_offsets().push_back(dst_nested_column.size());
642
63
        } //for
643
109
        return Status::OK();
644
101
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
563
27
                                                const NullMap* path_null_map) {
564
        // if path is const, we just need to parse it once
565
27
        JsonbPath const_path;
566
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
567
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
568
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
569
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
570
                                               r_raw_ref.to_string());
571
            }
572
573
            if (const_path.is_wildcard()) {
574
                return Status::InvalidJsonPath(
575
                        "In this situation, path expressions may not contain the * and ** tokens "
576
                        "or an array range.");
577
            }
578
        }
579
580
85
        for (size_t i = 0; i < input_rows_count; ++i) {
581
58
            auto index = index_check_const(i, JSONB_DATA_CONST);
582
            // if jsonb data is null or path column is null , we should return null
583
58
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
584
4
                res_null_map[i] = 1;
585
4
                dst_arr.insert_default();
586
4
                continue;
587
4
            }
588
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
589
                if (path_null_map && (*path_null_map)[i]) {
590
                    res_null_map[i] = 1;
591
                    dst_arr.insert_default();
592
                    continue;
593
                }
594
            }
595
596
54
            auto json_data = col_from_string.get_data_at(index);
597
54
            const JsonbDocument* doc = nullptr;
598
54
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
599
54
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
600
0
                dst_arr.clear();
601
0
                return Status::InvalidArgument("jsonb data is invalid");
602
0
            }
603
54
            const JsonbValue* obj_val;
604
54
            JsonbFindResult find_result;
605
            if constexpr (JSONB_PATH_PARAM) {
606
                if constexpr (!JSON_PATH_CONST) {
607
                    auto data = jsonb_path_column->get_data_at(i);
608
                    JsonbPath path;
609
                    if (!path.seek(data.data, data.size)) {
610
                        return Status::InvalidArgument(
611
                                "Json path error: Invalid Json Path for value: {} at row: {}",
612
                                std::string_view(data.data, data.size), i);
613
                    }
614
615
                    if (path.is_wildcard()) {
616
                        return Status::InvalidJsonPath(
617
                                "In this situation, path expressions may not contain the * and ** "
618
                                "tokens "
619
                                "or an array range. at row: {}",
620
                                i);
621
                    }
622
                    find_result = doc->getValue()->findValue(path);
623
                } else {
624
                    find_result = doc->getValue()->findValue(const_path);
625
                }
626
                obj_val = find_result.value;
627
54
            } else {
628
54
                obj_val = doc->getValue();
629
54
            }
630
631
54
            if (!obj_val || !obj_val->isObject()) {
632
                // if jsonb data is not object we should return null
633
36
                res_null_map[i] = 1;
634
36
                dst_arr.insert_default();
635
36
                continue;
636
36
            }
637
18
            const auto* obj = obj_val->unpack<ObjectVal>();
638
36
            for (const auto& it : *obj) {
639
36
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
640
36
            }
641
18
            dst_arr.get_offsets().push_back(dst_nested_column.size());
642
18
        } //for
643
27
        return Status::OK();
644
27
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
563
25
                                                const NullMap* path_null_map) {
564
        // if path is const, we just need to parse it once
565
25
        JsonbPath const_path;
566
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
567
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
568
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
569
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
570
                                               r_raw_ref.to_string());
571
            }
572
573
            if (const_path.is_wildcard()) {
574
                return Status::InvalidJsonPath(
575
                        "In this situation, path expressions may not contain the * and ** tokens "
576
                        "or an array range.");
577
            }
578
        }
579
580
80
        for (size_t i = 0; i < input_rows_count; ++i) {
581
51
            auto index = index_check_const(i, JSONB_DATA_CONST);
582
            // if jsonb data is null or path column is null , we should return null
583
51
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
584
6
                res_null_map[i] = 1;
585
6
                dst_arr.insert_default();
586
6
                continue;
587
6
            }
588
45
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
589
45
                if (path_null_map && (*path_null_map)[i]) {
590
4
                    res_null_map[i] = 1;
591
4
                    dst_arr.insert_default();
592
4
                    continue;
593
4
                }
594
45
            }
595
596
41
            auto json_data = col_from_string.get_data_at(index);
597
45
            const JsonbDocument* doc = nullptr;
598
45
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
599
45
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
600
0
                dst_arr.clear();
601
0
                return Status::InvalidArgument("jsonb data is invalid");
602
0
            }
603
45
            const JsonbValue* obj_val;
604
45
            JsonbFindResult find_result;
605
45
            if constexpr (JSONB_PATH_PARAM) {
606
45
                if constexpr (!JSON_PATH_CONST) {
607
45
                    auto data = jsonb_path_column->get_data_at(i);
608
45
                    JsonbPath path;
609
45
                    if (!path.seek(data.data, data.size)) {
610
5
                        return Status::InvalidArgument(
611
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
612
5
                                std::string_view(data.data, data.size), i);
613
5
                    }
614
615
40
                    if (path.is_wildcard()) {
616
1
                        return Status::InvalidJsonPath(
617
1
                                "In this situation, path expressions may not contain the * and ** "
618
1
                                "tokens "
619
1
                                "or an array range. at row: {}",
620
1
                                i);
621
1
                    }
622
39
                    find_result = doc->getValue()->findValue(path);
623
                } else {
624
                    find_result = doc->getValue()->findValue(const_path);
625
                }
626
0
                obj_val = find_result.value;
627
            } else {
628
                obj_val = doc->getValue();
629
            }
630
631
45
            if (!obj_val || !obj_val->isObject()) {
632
                // if jsonb data is not object we should return null
633
25
                res_null_map[i] = 1;
634
25
                dst_arr.insert_default();
635
25
                continue;
636
25
            }
637
20
            const auto* obj = obj_val->unpack<ObjectVal>();
638
20
            for (const auto& it : *obj) {
639
14
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
640
14
            }
641
20
            dst_arr.get_offsets().push_back(dst_nested_column.size());
642
20
        } //for
643
29
        return Status::OK();
644
25
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
563
47
                                                const NullMap* path_null_map) {
564
        // if path is const, we just need to parse it once
565
47
        JsonbPath const_path;
566
47
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
567
47
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
568
47
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
569
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
570
1
                                               r_raw_ref.to_string());
571
1
            }
572
573
46
            if (const_path.is_wildcard()) {
574
1
                return Status::InvalidJsonPath(
575
1
                        "In this situation, path expressions may not contain the * and ** tokens "
576
1
                        "or an array range.");
577
1
            }
578
46
        }
579
580
182
        for (size_t i = 0; i < input_rows_count; ++i) {
581
135
            auto index = index_check_const(i, JSONB_DATA_CONST);
582
            // if jsonb data is null or path column is null , we should return null
583
135
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
584
13
                res_null_map[i] = 1;
585
13
                dst_arr.insert_default();
586
13
                continue;
587
13
            }
588
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
589
                if (path_null_map && (*path_null_map)[i]) {
590
                    res_null_map[i] = 1;
591
                    dst_arr.insert_default();
592
                    continue;
593
                }
594
            }
595
596
122
            auto json_data = col_from_string.get_data_at(index);
597
122
            const JsonbDocument* doc = nullptr;
598
122
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
599
122
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
600
0
                dst_arr.clear();
601
0
                return Status::InvalidArgument("jsonb data is invalid");
602
0
            }
603
122
            const JsonbValue* obj_val;
604
122
            JsonbFindResult find_result;
605
122
            if constexpr (JSONB_PATH_PARAM) {
606
                if constexpr (!JSON_PATH_CONST) {
607
                    auto data = jsonb_path_column->get_data_at(i);
608
                    JsonbPath path;
609
                    if (!path.seek(data.data, data.size)) {
610
                        return Status::InvalidArgument(
611
                                "Json path error: Invalid Json Path for value: {} at row: {}",
612
                                std::string_view(data.data, data.size), i);
613
                    }
614
615
                    if (path.is_wildcard()) {
616
                        return Status::InvalidJsonPath(
617
                                "In this situation, path expressions may not contain the * and ** "
618
                                "tokens "
619
                                "or an array range. at row: {}",
620
                                i);
621
                    }
622
                    find_result = doc->getValue()->findValue(path);
623
122
                } else {
624
122
                    find_result = doc->getValue()->findValue(const_path);
625
122
                }
626
122
                obj_val = find_result.value;
627
            } else {
628
                obj_val = doc->getValue();
629
            }
630
631
122
            if (!obj_val || !obj_val->isObject()) {
632
                // if jsonb data is not object we should return null
633
113
                res_null_map[i] = 1;
634
113
                dst_arr.insert_default();
635
113
                continue;
636
113
            }
637
9
            const auto* obj = obj_val->unpack<ObjectVal>();
638
9
            for (const auto& it : *obj) {
639
9
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
640
9
            }
641
9
            dst_arr.get_offsets().push_back(dst_nested_column.size());
642
9
        } //for
643
47
        return Status::OK();
644
47
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
563
2
                                                const NullMap* path_null_map) {
564
        // if path is const, we just need to parse it once
565
2
        JsonbPath const_path;
566
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
567
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
568
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
569
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
570
                                               r_raw_ref.to_string());
571
            }
572
573
            if (const_path.is_wildcard()) {
574
                return Status::InvalidJsonPath(
575
                        "In this situation, path expressions may not contain the * and ** tokens "
576
                        "or an array range.");
577
            }
578
        }
579
580
30
        for (size_t i = 0; i < input_rows_count; ++i) {
581
24
            auto index = index_check_const(i, JSONB_DATA_CONST);
582
            // if jsonb data is null or path column is null , we should return null
583
24
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
584
0
                res_null_map[i] = 1;
585
0
                dst_arr.insert_default();
586
0
                continue;
587
0
            }
588
24
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
589
24
                if (path_null_map && (*path_null_map)[i]) {
590
4
                    res_null_map[i] = 1;
591
4
                    dst_arr.insert_default();
592
4
                    continue;
593
4
                }
594
24
            }
595
596
20
            auto json_data = col_from_string.get_data_at(index);
597
24
            const JsonbDocument* doc = nullptr;
598
24
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
599
24
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
600
0
                dst_arr.clear();
601
0
                return Status::InvalidArgument("jsonb data is invalid");
602
0
            }
603
24
            const JsonbValue* obj_val;
604
24
            JsonbFindResult find_result;
605
24
            if constexpr (JSONB_PATH_PARAM) {
606
24
                if constexpr (!JSON_PATH_CONST) {
607
24
                    auto data = jsonb_path_column->get_data_at(i);
608
24
                    JsonbPath path;
609
24
                    if (!path.seek(data.data, data.size)) {
610
0
                        return Status::InvalidArgument(
611
0
                                "Json path error: Invalid Json Path for value: {} at row: {}",
612
0
                                std::string_view(data.data, data.size), i);
613
0
                    }
614
615
24
                    if (path.is_wildcard()) {
616
0
                        return Status::InvalidJsonPath(
617
0
                                "In this situation, path expressions may not contain the * and ** "
618
0
                                "tokens "
619
0
                                "or an array range. at row: {}",
620
0
                                i);
621
0
                    }
622
24
                    find_result = doc->getValue()->findValue(path);
623
                } else {
624
                    find_result = doc->getValue()->findValue(const_path);
625
                }
626
0
                obj_val = find_result.value;
627
            } else {
628
                obj_val = doc->getValue();
629
            }
630
631
24
            if (!obj_val || !obj_val->isObject()) {
632
                // if jsonb data is not object we should return null
633
8
                res_null_map[i] = 1;
634
8
                dst_arr.insert_default();
635
8
                continue;
636
8
            }
637
16
            const auto* obj = obj_val->unpack<ObjectVal>();
638
16
            for (const auto& it : *obj) {
639
16
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
640
16
            }
641
16
            dst_arr.get_offsets().push_back(dst_nested_column.size());
642
16
        } //for
643
6
        return Status::OK();
644
2
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
645
};
646
647
class FunctionJsonbExtractPath : public IFunction {
648
public:
649
    static constexpr auto name = "json_exists_path";
650
    static constexpr auto alias = "jsonb_exists_path";
651
    using ColumnType = ColumnUInt8;
652
    using Container = typename ColumnType::Container;
653
182
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); }
654
1
    String get_name() const override { return name; }
655
173
    size_t get_number_of_arguments() const override { return 2; }
656
173
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
657
        // it only needs to indicate existence and does not need to return nullable values.
658
173
        const auto nullable = std::ranges::any_of(
659
195
                arguments, [](const DataTypePtr& type) { return type->is_nullable(); });
660
173
        if (nullable) {
661
152
            return make_nullable(std::make_shared<DataTypeUInt8>());
662
152
        } else {
663
21
            return std::make_shared<DataTypeUInt8>();
664
21
        }
665
173
    }
666
667
1.52k
    bool use_default_implementation_for_nulls() const override { return false; }
668
669
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
670
1.35k
                        uint32_t result, size_t input_rows_count) const override {
671
        // prepare jsonb data column
672
1.35k
        auto&& [jsonb_data_column, jsonb_data_const] =
673
1.35k
                unpack_if_const(block.get_by_position(arguments[0]).column);
674
675
1.35k
        const NullMap* data_null_map = nullptr;
676
1.35k
        const ColumnString* data_col = nullptr;
677
1.35k
        if (jsonb_data_column->is_nullable()) {
678
1.17k
            const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get());
679
1.17k
            data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
680
1.17k
            data_null_map = &nullable->get_null_map_data();
681
1.17k
        } else {
682
177
            data_col = assert_cast<const ColumnString*>(jsonb_data_column.get());
683
177
        }
684
685
1.35k
        const auto& ldata = data_col->get_chars();
686
1.35k
        const auto& loffsets = data_col->get_offsets();
687
688
        // prepare parse path column prepare
689
1.35k
        auto&& [path_column, path_const] =
690
1.35k
                unpack_if_const(block.get_by_position(arguments[1]).column);
691
1.35k
        const ColumnString* path_col = nullptr;
692
1.35k
        const NullMap* path_null_map = nullptr;
693
1.35k
        if (path_column->is_nullable()) {
694
7
            const auto* nullable = assert_cast<const ColumnNullable*>(path_column.get());
695
7
            path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
696
7
            path_null_map = &nullable->get_null_map_data();
697
1.34k
        } else {
698
1.34k
            path_col = assert_cast<const ColumnString*>(path_column.get());
699
1.34k
        }
700
701
18.4E
        DCHECK(!(jsonb_data_const && path_const))
702
18.4E
                << "jsonb_data_const and path_const should not be both const";
703
704
1.35k
        auto create_all_null_result = [&]() {
705
2
            auto res = ColumnType::create();
706
2
            res->insert_default();
707
2
            auto nullable_column =
708
2
                    ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
709
2
            auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count);
710
2
            block.get_by_position(result).column = std::move(const_column);
711
2
            return Status::OK();
712
2
        };
713
714
1.35k
        MutableColumnPtr result_null_map_column;
715
1.35k
        NullMap* result_null_map = nullptr;
716
1.35k
        if (data_null_map || path_null_map) {
717
1.17k
            result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
718
1.17k
            result_null_map = &assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();
719
720
1.17k
            if (data_null_map) {
721
1.17k
                VectorizedUtils::update_null_map(*result_null_map, *data_null_map,
722
1.17k
                                                 jsonb_data_const);
723
1.17k
            }
724
725
1.17k
            if (path_null_map) {
726
7
                VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const);
727
7
            }
728
729
1.17k
            if (!simd::contain_zero(result_null_map->data(), input_rows_count)) {
730
2
                return create_all_null_result();
731
2
            }
732
1.17k
        }
733
734
1.35k
        auto res = ColumnType::create();
735
736
1.35k
        bool is_invalid_json_path = false;
737
738
1.35k
        const auto& rdata = path_col->get_chars();
739
1.35k
        const auto& roffsets = path_col->get_offsets();
740
1.35k
        if (jsonb_data_const) {
741
2
            if (data_null_map && (*data_null_map)[0]) {
742
0
                return create_all_null_result();
743
0
            }
744
2
            scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(),
745
2
                          result_null_map, is_invalid_json_path);
746
1.34k
        } else if (path_const) {
747
1.32k
            if (path_null_map && (*path_null_map)[0]) {
748
0
                return create_all_null_result();
749
0
            }
750
1.32k
            vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(),
751
1.32k
                          result_null_map, is_invalid_json_path);
752
1.32k
        } else {
753
28
            vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
754
28
                          result_null_map, is_invalid_json_path);
755
28
        }
756
1.35k
        if (is_invalid_json_path) {
757
7
            return Status::InvalidArgument(
758
7
                    "Json path error: Invalid Json Path for value: {}",
759
7
                    std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
760
7
        }
761
762
1.34k
        if (result_null_map) {
763
1.17k
            auto nullabel_col =
764
1.17k
                    ColumnNullable::create(std::move(res), std::move(result_null_map_column));
765
1.17k
            block.get_by_position(result).column = std::move(nullabel_col);
766
1.17k
        } else {
767
173
            block.get_by_position(result).column = std::move(res);
768
173
        }
769
1.34k
        return Status::OK();
770
1.35k
    }
771
772
private:
773
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
774
3.01k
                                              size_t l_str_size, JsonbPath& path) {
775
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
776
3.01k
        const JsonbDocument* doc = nullptr;
777
3.01k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
778
3.01k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
779
0
            return;
780
0
        }
781
782
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
783
3.01k
        auto result = doc->getValue()->findValue(path);
784
785
3.01k
        if (result.value) {
786
445
            res[i] = 1;
787
445
        }
788
3.01k
    }
789
    static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
790
                              const ColumnString::Offsets& loffsets,
791
                              const ColumnString::Chars& rdata,
792
                              const ColumnString::Offsets& roffsets, Container& res,
793
35
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
794
35
        const size_t size = loffsets.size();
795
35
        res.resize_fill(size, 0);
796
797
80
        for (size_t i = 0; i < size; i++) {
798
50
            if (result_null_map && (*result_null_map)[i]) {
799
8
                continue;
800
8
            }
801
802
42
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
803
42
            int l_str_size = loffsets[i] - loffsets[i - 1];
804
805
42
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
806
42
            int r_str_size = roffsets[i] - roffsets[i - 1];
807
808
42
            JsonbPath path;
809
42
            if (!path.seek(r_raw_str, r_str_size)) {
810
5
                is_invalid_json_path = true;
811
5
                return;
812
5
            }
813
814
37
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
815
37
        }
816
35
    }
817
    static void scalar_vector(FunctionContext* context, const StringRef& ldata,
818
                              const ColumnString::Chars& rdata,
819
                              const ColumnString::Offsets& roffsets, Container& res,
820
2
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
821
2
        const size_t size = roffsets.size();
822
2
        res.resize_fill(size, 0);
823
824
14
        for (size_t i = 0; i < size; i++) {
825
13
            if (result_null_map && (*result_null_map)[i]) {
826
4
                continue;
827
4
            }
828
9
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
829
9
            int r_str_size = roffsets[i] - roffsets[i - 1];
830
831
9
            JsonbPath path;
832
9
            if (!path.seek(r_raw_str, r_str_size)) {
833
1
                is_invalid_json_path = true;
834
1
                return;
835
1
            }
836
837
8
            inner_loop_impl(i, res, ldata.data, ldata.size, path);
838
8
        }
839
2
    }
840
    static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
841
                              const ColumnString::Offsets& loffsets, const StringRef& rdata,
842
                              Container& res, const NullMap* result_null_map,
843
1.32k
                              bool& is_invalid_json_path) {
844
1.32k
        const size_t size = loffsets.size();
845
1.32k
        res.resize_fill(size, 0);
846
847
1.32k
        JsonbPath path;
848
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
849
1
            is_invalid_json_path = true;
850
1
            return;
851
1
        }
852
853
4.51k
        for (size_t i = 0; i < size; i++) {
854
3.19k
            if (result_null_map && (*result_null_map)[i]) {
855
232
                continue;
856
232
            }
857
2.96k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
858
2.96k
            int l_str_size = loffsets[i] - loffsets[i - 1];
859
860
2.96k
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
861
2.96k
        }
862
1.31k
    }
863
};
864
865
template <typename ValueType>
866
struct JsonbExtractStringImpl {
867
    using ReturnType = typename ValueType::ReturnType;
868
    using ColumnType = typename ValueType::ColumnType;
869
870
private:
871
    static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i,
872
                                              ColumnString::Chars& res_data,
873
                                              ColumnString::Offsets& res_offsets, NullMap& null_map,
874
                                              std::unique_ptr<JsonbToJson>& formater,
875
141k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
876
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
877
141k
        const JsonbDocument* doc = nullptr;
878
141k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
879
141k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
880
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
0
            return;
882
0
        }
883
884
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
885
141k
        auto find_result = doc->getValue()->findValue(path);
886
887
141k
        if (UNLIKELY(!find_result.value)) {
888
20.9k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
889
20.9k
            return;
890
20.9k
        }
891
892
120k
        if constexpr (ValueType::only_get_type) {
893
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
894
429
                                        res_data, res_offsets);
895
429
            return;
896
120k
        } else {
897
120k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
898
120k
            if constexpr (ValueType::no_quotes) {
899
2
                if (find_result.value->isString()) {
900
1
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
901
1
                    const auto* blob = str_value->getBlob();
902
1
                    if (str_value->length() > 1 && blob[0] == '"' &&
903
1
                        blob[str_value->length() - 1] == '"') {
904
0
                        writer->writeStartString();
905
0
                        writer->writeString(blob + 1, str_value->length() - 2);
906
0
                        writer->writeEndString();
907
0
                        StringOP::push_value_string(
908
0
                                std::string_view(writer->getOutput()->getBuffer(),
909
0
                                                 writer->getOutput()->getSize()),
910
0
                                i, res_data, res_offsets);
911
0
                        return;
912
0
                    }
913
1
                }
914
2
            }
915
2
            writer->writeValueSimple(find_result.value);
916
120k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
917
120k
                                                         writer->getOutput()->getSize()),
918
120k
                                        i, res_data, res_offsets);
919
120k
        }
920
120k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
875
2.98k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
876
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
877
2.98k
        const JsonbDocument* doc = nullptr;
878
2.98k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
879
2.98k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
880
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
0
            return;
882
0
        }
883
884
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
885
2.98k
        auto find_result = doc->getValue()->findValue(path);
886
887
2.98k
        if (UNLIKELY(!find_result.value)) {
888
2.55k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
889
2.55k
            return;
890
2.55k
        }
891
892
429
        if constexpr (ValueType::only_get_type) {
893
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
894
429
                                        res_data, res_offsets);
895
429
            return;
896
        } else {
897
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
898
            if constexpr (ValueType::no_quotes) {
899
                if (find_result.value->isString()) {
900
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
901
                    const auto* blob = str_value->getBlob();
902
                    if (str_value->length() > 1 && blob[0] == '"' &&
903
                        blob[str_value->length() - 1] == '"') {
904
                        writer->writeStartString();
905
                        writer->writeString(blob + 1, str_value->length() - 2);
906
                        writer->writeEndString();
907
                        StringOP::push_value_string(
908
                                std::string_view(writer->getOutput()->getBuffer(),
909
                                                 writer->getOutput()->getSize()),
910
                                i, res_data, res_offsets);
911
                        return;
912
                    }
913
                }
914
            }
915
            writer->writeValueSimple(find_result.value);
916
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
917
                                                         writer->getOutput()->getSize()),
918
                                        i, res_data, res_offsets);
919
        }
920
429
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
875
138k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
876
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
877
138k
        const JsonbDocument* doc = nullptr;
878
138k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
879
138k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
880
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
0
            return;
882
0
        }
883
884
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
885
138k
        auto find_result = doc->getValue()->findValue(path);
886
887
138k
        if (UNLIKELY(!find_result.value)) {
888
18.3k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
889
18.3k
            return;
890
18.3k
        }
891
892
        if constexpr (ValueType::only_get_type) {
893
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
894
                                        res_data, res_offsets);
895
            return;
896
120k
        } else {
897
120k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
898
            if constexpr (ValueType::no_quotes) {
899
                if (find_result.value->isString()) {
900
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
901
                    const auto* blob = str_value->getBlob();
902
                    if (str_value->length() > 1 && blob[0] == '"' &&
903
                        blob[str_value->length() - 1] == '"') {
904
                        writer->writeStartString();
905
                        writer->writeString(blob + 1, str_value->length() - 2);
906
                        writer->writeEndString();
907
                        StringOP::push_value_string(
908
                                std::string_view(writer->getOutput()->getBuffer(),
909
                                                 writer->getOutput()->getSize()),
910
                                i, res_data, res_offsets);
911
                        return;
912
                    }
913
                }
914
            }
915
120k
            writer->writeValueSimple(find_result.value);
916
120k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
917
120k
                                                         writer->getOutput()->getSize()),
918
120k
                                        i, res_data, res_offsets);
919
120k
        }
920
120k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
875
2
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
876
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
877
2
        const JsonbDocument* doc = nullptr;
878
2
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
879
2
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
880
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
0
            return;
882
0
        }
883
884
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
885
2
        auto find_result = doc->getValue()->findValue(path);
886
887
2
        if (UNLIKELY(!find_result.value)) {
888
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
889
0
            return;
890
0
        }
891
892
        if constexpr (ValueType::only_get_type) {
893
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
894
                                        res_data, res_offsets);
895
            return;
896
2
        } else {
897
2
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
898
2
            if constexpr (ValueType::no_quotes) {
899
2
                if (find_result.value->isString()) {
900
1
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
901
1
                    const auto* blob = str_value->getBlob();
902
1
                    if (str_value->length() > 1 && blob[0] == '"' &&
903
1
                        blob[str_value->length() - 1] == '"') {
904
0
                        writer->writeStartString();
905
0
                        writer->writeString(blob + 1, str_value->length() - 2);
906
0
                        writer->writeEndString();
907
0
                        StringOP::push_value_string(
908
0
                                std::string_view(writer->getOutput()->getBuffer(),
909
0
                                                 writer->getOutput()->getSize()),
910
0
                                i, res_data, res_offsets);
911
0
                        return;
912
0
                    }
913
1
                }
914
2
            }
915
2
            writer->writeValueSimple(find_result.value);
916
2
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
917
2
                                                         writer->getOutput()->getSize()),
918
2
                                        i, res_data, res_offsets);
919
2
        }
920
2
    }
921
922
public:
923
    // for jsonb_extract_string
924
    static Status vector_vector_v2(
925
            FunctionContext* context, const ColumnString::Chars& ldata,
926
            const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
927
            const bool& json_data_const,
928
            const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
929
            const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const,
930
11.2k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
931
11.2k
        const size_t input_rows_count = null_map.size();
932
11.2k
        res_offsets.resize(input_rows_count);
933
934
11.2k
        auto writer = std::make_unique<JsonbWriter>();
935
11.2k
        std::unique_ptr<JsonbToJson> formater;
936
937
        // reuseable json path list, espacially for const path
938
11.2k
        std::vector<JsonbPath> json_path_list;
939
11.2k
        json_path_list.resize(rdata_columns.size());
940
941
        // lambda function to parse json path for row i and path pi
942
11.6k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
11.6k
            const auto index = index_check_const(i, path_const[pi]);
944
945
11.6k
            const ColumnString* path_col = rdata_columns[pi];
946
11.6k
            const ColumnString::Chars& rdata = path_col->get_chars();
947
11.6k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
11.6k
            size_t r_off = roffsets[index - 1];
949
11.6k
            size_t r_size = roffsets[index] - r_off;
950
11.6k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
11.6k
            JsonbPath path;
953
11.6k
            if (!path.seek(r_raw, r_size)) {
954
9
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
9
                                               std::string_view(r_raw, r_size));
956
9
            }
957
958
11.6k
            json_path_list[pi] = std::move(path);
959
960
11.6k
            return Status::OK();
961
11.6k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
942
1.34k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
1.34k
            const auto index = index_check_const(i, path_const[pi]);
944
945
1.34k
            const ColumnString* path_col = rdata_columns[pi];
946
1.34k
            const ColumnString::Chars& rdata = path_col->get_chars();
947
1.34k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
1.34k
            size_t r_off = roffsets[index - 1];
949
1.34k
            size_t r_size = roffsets[index] - r_off;
950
1.34k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
1.34k
            JsonbPath path;
953
1.34k
            if (!path.seek(r_raw, r_size)) {
954
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
1
                                               std::string_view(r_raw, r_size));
956
1
            }
957
958
1.33k
            json_path_list[pi] = std::move(path);
959
960
1.33k
            return Status::OK();
961
1.34k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
942
10.3k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
10.3k
            const auto index = index_check_const(i, path_const[pi]);
944
945
10.3k
            const ColumnString* path_col = rdata_columns[pi];
946
10.3k
            const ColumnString::Chars& rdata = path_col->get_chars();
947
10.3k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
10.3k
            size_t r_off = roffsets[index - 1];
949
10.3k
            size_t r_size = roffsets[index] - r_off;
950
10.3k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
10.3k
            JsonbPath path;
953
10.3k
            if (!path.seek(r_raw, r_size)) {
954
8
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
8
                                               std::string_view(r_raw, r_size));
956
8
            }
957
958
10.2k
            json_path_list[pi] = std::move(path);
959
960
10.2k
            return Status::OK();
961
10.3k
        };
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
942
8
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
8
            const auto index = index_check_const(i, path_const[pi]);
944
945
8
            const ColumnString* path_col = rdata_columns[pi];
946
8
            const ColumnString::Chars& rdata = path_col->get_chars();
947
8
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
8
            size_t r_off = roffsets[index - 1];
949
8
            size_t r_size = roffsets[index] - r_off;
950
8
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
8
            JsonbPath path;
953
8
            if (!path.seek(r_raw, r_size)) {
954
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
0
                                               std::string_view(r_raw, r_size));
956
0
            }
957
958
8
            json_path_list[pi] = std::move(path);
959
960
8
            return Status::OK();
961
8
        };
962
963
22.8k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
964
11.5k
            if (path_const[pi]) {
965
11.3k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
966
41
                    continue;
967
41
                }
968
11.3k
                RETURN_IF_ERROR(parse_json_path(0, pi));
969
11.3k
            }
970
11.5k
        }
971
972
11.2k
        res_data.reserve(ldata.size());
973
154k
        for (size_t i = 0; i < input_rows_count; ++i) {
974
143k
            if (null_map[i]) {
975
0
                continue;
976
0
            }
977
978
143k
            const auto data_index = index_check_const(i, json_data_const);
979
143k
            if (l_null_map && (*l_null_map)[data_index]) {
980
1.90k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
981
1.90k
                continue;
982
1.90k
            }
983
984
141k
            size_t l_off = loffsets[data_index - 1];
985
141k
            size_t l_size = loffsets[data_index] - l_off;
986
141k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
987
141k
            if (rdata_columns.size() == 1) { // just return origin value
988
140k
                const auto path_index = index_check_const(i, path_const[0]);
989
140k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
990
30
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
991
30
                    continue;
992
30
                }
993
994
140k
                if (!path_const[0]) {
995
286
                    RETURN_IF_ERROR(parse_json_path(i, 0));
996
286
                }
997
998
140k
                writer->reset();
999
140k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
1000
140k
                                l_size, json_path_list[0]);
1001
140k
            } else { // will make array string to user
1002
490
                writer->reset();
1003
490
                bool has_value = false;
1004
1005
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1006
490
                const JsonbDocument* doc = nullptr;
1007
490
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1008
1009
1.58k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1010
1.19k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1011
0
                        continue;
1012
0
                    }
1013
1014
1.19k
                    const auto path_index = index_check_const(i, path_const[pi]);
1015
1.19k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1016
99
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1017
99
                        break;
1018
99
                    }
1019
1020
1.09k
                    if (!path_const[pi]) {
1021
28
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1022
28
                    }
1023
1024
1.09k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1025
1026
1.09k
                    if (find_result.value) {
1027
255
                        if (!has_value) {
1028
141
                            has_value = true;
1029
141
                            writer->writeStartArray();
1030
141
                        }
1031
255
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1032
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1033
                            // if value is array, we should write all items in array, instead of write the array itself.
1034
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1035
45
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1036
45
                                writer->writeValue(&item);
1037
45
                            }
1038
235
                        } else {
1039
235
                            writer->writeValue(find_result.value);
1040
235
                        }
1041
255
                    }
1042
1.09k
                }
1043
490
                if (has_value) {
1044
141
                    writer->writeEndArray();
1045
141
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1046
141
                                                                 writer->getOutput()->getSize()),
1047
141
                                                i, res_data, res_offsets);
1048
349
                } else {
1049
349
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1050
349
                }
1051
490
            }
1052
141k
        } //for
1053
11.2k
        return Status::OK();
1054
11.2k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
930
1.32k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
931
1.32k
        const size_t input_rows_count = null_map.size();
932
1.32k
        res_offsets.resize(input_rows_count);
933
934
1.32k
        auto writer = std::make_unique<JsonbWriter>();
935
1.32k
        std::unique_ptr<JsonbToJson> formater;
936
937
        // reuseable json path list, espacially for const path
938
1.32k
        std::vector<JsonbPath> json_path_list;
939
1.32k
        json_path_list.resize(rdata_columns.size());
940
941
        // lambda function to parse json path for row i and path pi
942
1.32k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
1.32k
            const auto index = index_check_const(i, path_const[pi]);
944
945
1.32k
            const ColumnString* path_col = rdata_columns[pi];
946
1.32k
            const ColumnString::Chars& rdata = path_col->get_chars();
947
1.32k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
1.32k
            size_t r_off = roffsets[index - 1];
949
1.32k
            size_t r_size = roffsets[index] - r_off;
950
1.32k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
1.32k
            JsonbPath path;
953
1.32k
            if (!path.seek(r_raw, r_size)) {
954
1.32k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
1.32k
                                               std::string_view(r_raw, r_size));
956
1.32k
            }
957
958
1.32k
            json_path_list[pi] = std::move(path);
959
960
1.32k
            return Status::OK();
961
1.32k
        };
962
963
2.65k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
964
1.32k
            if (path_const[pi]) {
965
1.32k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
966
1
                    continue;
967
1
                }
968
1.32k
                RETURN_IF_ERROR(parse_json_path(0, pi));
969
1.32k
            }
970
1.32k
        }
971
972
1.32k
        res_data.reserve(ldata.size());
973
4.57k
        for (size_t i = 0; i < input_rows_count; ++i) {
974
3.24k
            if (null_map[i]) {
975
0
                continue;
976
0
            }
977
978
3.24k
            const auto data_index = index_check_const(i, json_data_const);
979
3.24k
            if (l_null_map && (*l_null_map)[data_index]) {
980
248
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
981
248
                continue;
982
248
            }
983
984
3.00k
            size_t l_off = loffsets[data_index - 1];
985
3.00k
            size_t l_size = loffsets[data_index] - l_off;
986
3.00k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
987
3.00k
            if (rdata_columns.size() == 1) { // just return origin value
988
3.00k
                const auto path_index = index_check_const(i, path_const[0]);
989
3.00k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
990
16
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
991
16
                    continue;
992
16
                }
993
994
2.98k
                if (!path_const[0]) {
995
18
                    RETURN_IF_ERROR(parse_json_path(i, 0));
996
18
                }
997
998
2.98k
                writer->reset();
999
2.98k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
1000
2.98k
                                l_size, json_path_list[0]);
1001
2.98k
            } else { // will make array string to user
1002
0
                writer->reset();
1003
0
                bool has_value = false;
1004
1005
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1006
0
                const JsonbDocument* doc = nullptr;
1007
0
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1008
1009
0
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1010
0
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1011
0
                        continue;
1012
0
                    }
1013
1014
0
                    const auto path_index = index_check_const(i, path_const[pi]);
1015
0
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1016
0
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1017
0
                        break;
1018
0
                    }
1019
1020
0
                    if (!path_const[pi]) {
1021
0
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1022
0
                    }
1023
1024
0
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1025
1026
0
                    if (find_result.value) {
1027
0
                        if (!has_value) {
1028
0
                            has_value = true;
1029
0
                            writer->writeStartArray();
1030
0
                        }
1031
0
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1032
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1033
                            // if value is array, we should write all items in array, instead of write the array itself.
1034
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1035
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1036
0
                                writer->writeValue(&item);
1037
0
                            }
1038
0
                        } else {
1039
0
                            writer->writeValue(find_result.value);
1040
0
                        }
1041
0
                    }
1042
0
                }
1043
0
                if (has_value) {
1044
0
                    writer->writeEndArray();
1045
0
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1046
0
                                                                 writer->getOutput()->getSize()),
1047
0
                                                i, res_data, res_offsets);
1048
0
                } else {
1049
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1050
0
                }
1051
0
            }
1052
3.00k
        } //for
1053
1.32k
        return Status::OK();
1054
1.32k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
930
9.94k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
931
9.94k
        const size_t input_rows_count = null_map.size();
932
9.94k
        res_offsets.resize(input_rows_count);
933
934
9.94k
        auto writer = std::make_unique<JsonbWriter>();
935
9.94k
        std::unique_ptr<JsonbToJson> formater;
936
937
        // reuseable json path list, espacially for const path
938
9.94k
        std::vector<JsonbPath> json_path_list;
939
9.94k
        json_path_list.resize(rdata_columns.size());
940
941
        // lambda function to parse json path for row i and path pi
942
9.94k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
9.94k
            const auto index = index_check_const(i, path_const[pi]);
944
945
9.94k
            const ColumnString* path_col = rdata_columns[pi];
946
9.94k
            const ColumnString::Chars& rdata = path_col->get_chars();
947
9.94k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
9.94k
            size_t r_off = roffsets[index - 1];
949
9.94k
            size_t r_size = roffsets[index] - r_off;
950
9.94k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
9.94k
            JsonbPath path;
953
9.94k
            if (!path.seek(r_raw, r_size)) {
954
9.94k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
9.94k
                                               std::string_view(r_raw, r_size));
956
9.94k
            }
957
958
9.94k
            json_path_list[pi] = std::move(path);
959
960
9.94k
            return Status::OK();
961
9.94k
        };
962
963
20.1k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
964
10.2k
            if (path_const[pi]) {
965
10.0k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
966
40
                    continue;
967
40
                }
968
10.0k
                RETURN_IF_ERROR(parse_json_path(0, pi));
969
10.0k
            }
970
10.2k
        }
971
972
9.93k
        res_data.reserve(ldata.size());
973
149k
        for (size_t i = 0; i < input_rows_count; ++i) {
974
140k
            if (null_map[i]) {
975
0
                continue;
976
0
            }
977
978
140k
            const auto data_index = index_check_const(i, json_data_const);
979
140k
            if (l_null_map && (*l_null_map)[data_index]) {
980
1.65k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
981
1.65k
                continue;
982
1.65k
            }
983
984
138k
            size_t l_off = loffsets[data_index - 1];
985
138k
            size_t l_size = loffsets[data_index] - l_off;
986
138k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
987
138k
            if (rdata_columns.size() == 1) { // just return origin value
988
137k
                const auto path_index = index_check_const(i, path_const[0]);
989
137k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
990
14
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
991
14
                    continue;
992
14
                }
993
994
137k
                if (!path_const[0]) {
995
266
                    RETURN_IF_ERROR(parse_json_path(i, 0));
996
266
                }
997
998
137k
                writer->reset();
999
137k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
1000
137k
                                l_size, json_path_list[0]);
1001
137k
            } else { // will make array string to user
1002
487
                writer->reset();
1003
487
                bool has_value = false;
1004
1005
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1006
487
                const JsonbDocument* doc = nullptr;
1007
487
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1008
1009
1.57k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1010
1.18k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1011
0
                        continue;
1012
0
                    }
1013
1014
1.18k
                    const auto path_index = index_check_const(i, path_const[pi]);
1015
1.18k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1016
98
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1017
98
                        break;
1018
98
                    }
1019
1020
1.08k
                    if (!path_const[pi]) {
1021
22
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1022
22
                    }
1023
1024
1.08k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1025
1026
1.08k
                    if (find_result.value) {
1027
249
                        if (!has_value) {
1028
138
                            has_value = true;
1029
138
                            writer->writeStartArray();
1030
138
                        }
1031
249
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1032
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1033
                            // if value is array, we should write all items in array, instead of write the array itself.
1034
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1035
45
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1036
45
                                writer->writeValue(&item);
1037
45
                            }
1038
229
                        } else {
1039
229
                            writer->writeValue(find_result.value);
1040
229
                        }
1041
249
                    }
1042
1.08k
                }
1043
487
                if (has_value) {
1044
138
                    writer->writeEndArray();
1045
138
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1046
138
                                                                 writer->getOutput()->getSize()),
1047
138
                                                i, res_data, res_offsets);
1048
349
                } else {
1049
349
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1050
349
                }
1051
487
            }
1052
138k
        } //for
1053
9.93k
        return Status::OK();
1054
9.93k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
930
6
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
931
6
        const size_t input_rows_count = null_map.size();
932
6
        res_offsets.resize(input_rows_count);
933
934
6
        auto writer = std::make_unique<JsonbWriter>();
935
6
        std::unique_ptr<JsonbToJson> formater;
936
937
        // reuseable json path list, espacially for const path
938
6
        std::vector<JsonbPath> json_path_list;
939
6
        json_path_list.resize(rdata_columns.size());
940
941
        // lambda function to parse json path for row i and path pi
942
6
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
943
6
            const auto index = index_check_const(i, path_const[pi]);
944
945
6
            const ColumnString* path_col = rdata_columns[pi];
946
6
            const ColumnString::Chars& rdata = path_col->get_chars();
947
6
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
948
6
            size_t r_off = roffsets[index - 1];
949
6
            size_t r_size = roffsets[index] - r_off;
950
6
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
951
952
6
            JsonbPath path;
953
6
            if (!path.seek(r_raw, r_size)) {
954
6
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
955
6
                                               std::string_view(r_raw, r_size));
956
6
            }
957
958
6
            json_path_list[pi] = std::move(path);
959
960
6
            return Status::OK();
961
6
        };
962
963
16
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
964
10
            if (path_const[pi]) {
965
0
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
966
0
                    continue;
967
0
                }
968
0
                RETURN_IF_ERROR(parse_json_path(0, pi));
969
0
            }
970
10
        }
971
972
6
        res_data.reserve(ldata.size());
973
12
        for (size_t i = 0; i < input_rows_count; ++i) {
974
6
            if (null_map[i]) {
975
0
                continue;
976
0
            }
977
978
6
            const auto data_index = index_check_const(i, json_data_const);
979
6
            if (l_null_map && (*l_null_map)[data_index]) {
980
1
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
981
1
                continue;
982
1
            }
983
984
5
            size_t l_off = loffsets[data_index - 1];
985
5
            size_t l_size = loffsets[data_index] - l_off;
986
5
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
987
5
            if (rdata_columns.size() == 1) { // just return origin value
988
2
                const auto path_index = index_check_const(i, path_const[0]);
989
2
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
990
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
991
0
                    continue;
992
0
                }
993
994
2
                if (!path_const[0]) {
995
2
                    RETURN_IF_ERROR(parse_json_path(i, 0));
996
2
                }
997
998
2
                writer->reset();
999
2
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
1000
2
                                l_size, json_path_list[0]);
1001
3
            } else { // will make array string to user
1002
3
                writer->reset();
1003
3
                bool has_value = false;
1004
1005
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1006
3
                const JsonbDocument* doc = nullptr;
1007
3
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1008
1009
9
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1010
7
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1011
0
                        continue;
1012
0
                    }
1013
1014
7
                    const auto path_index = index_check_const(i, path_const[pi]);
1015
7
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1016
1
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1017
1
                        break;
1018
1
                    }
1019
1020
6
                    if (!path_const[pi]) {
1021
6
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1022
6
                    }
1023
1024
6
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1025
1026
6
                    if (find_result.value) {
1027
6
                        if (!has_value) {
1028
3
                            has_value = true;
1029
3
                            writer->writeStartArray();
1030
3
                        }
1031
6
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1032
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1033
                            // if value is array, we should write all items in array, instead of write the array itself.
1034
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1035
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1036
0
                                writer->writeValue(&item);
1037
0
                            }
1038
6
                        } else {
1039
6
                            writer->writeValue(find_result.value);
1040
6
                        }
1041
6
                    }
1042
6
                }
1043
3
                if (has_value) {
1044
3
                    writer->writeEndArray();
1045
3
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1046
3
                                                                 writer->getOutput()->getSize()),
1047
3
                                                i, res_data, res_offsets);
1048
3
                } else {
1049
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1050
0
                }
1051
3
            }
1052
5
        } //for
1053
6
        return Status::OK();
1054
6
    }
1055
1056
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1057
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1058
                                const ColumnString::Chars& rdata,
1059
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1060
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1061
                                NullMap& null_map) {
1062
        size_t input_rows_count = loffsets.size();
1063
        res_offsets.resize(input_rows_count);
1064
1065
        std::unique_ptr<JsonbToJson> formater;
1066
1067
        JsonbWriter writer;
1068
        for (size_t i = 0; i < input_rows_count; ++i) {
1069
            if (l_null_map && (*l_null_map)[i]) {
1070
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1071
                continue;
1072
            }
1073
1074
            if (r_null_map && (*r_null_map)[i]) {
1075
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1076
                continue;
1077
            }
1078
1079
            int l_size = loffsets[i] - loffsets[i - 1];
1080
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1081
1082
            int r_size = roffsets[i] - roffsets[i - 1];
1083
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1084
1085
            JsonbPath path;
1086
            if (!path.seek(r_raw, r_size)) {
1087
                return Status::InvalidArgument(
1088
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1089
                        std::string_view(r_raw, r_size), i);
1090
            }
1091
1092
            writer.reset();
1093
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1094
                            path);
1095
        } //for
1096
        return Status::OK();
1097
    } //function
1098
1099
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1100
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1101
                                const StringRef& rdata, ColumnString::Chars& res_data,
1102
                                ColumnString::Offsets& res_offsets, NullMap& null_map) {
1103
        size_t input_rows_count = loffsets.size();
1104
        res_offsets.resize(input_rows_count);
1105
1106
        std::unique_ptr<JsonbToJson> formater;
1107
1108
        JsonbPath path;
1109
        if (!path.seek(rdata.data, rdata.size)) {
1110
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1111
                                           std::string_view(rdata.data, rdata.size));
1112
        }
1113
1114
        JsonbWriter writer;
1115
        for (size_t i = 0; i < input_rows_count; ++i) {
1116
            if (l_null_map && (*l_null_map)[i]) {
1117
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1118
                continue;
1119
            }
1120
1121
            int l_size = loffsets[i] - loffsets[i - 1];
1122
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1123
1124
            writer.reset();
1125
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1126
                            path);
1127
        } //for
1128
        return Status::OK();
1129
    } //function
1130
1131
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1132
                                const ColumnString::Chars& rdata,
1133
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1134
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1135
                                NullMap& null_map) {
1136
        size_t input_rows_count = roffsets.size();
1137
        res_offsets.resize(input_rows_count);
1138
1139
        std::unique_ptr<JsonbToJson> formater;
1140
1141
        JsonbWriter writer;
1142
1143
        for (size_t i = 0; i < input_rows_count; ++i) {
1144
            if (r_null_map && (*r_null_map)[i]) {
1145
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1146
                continue;
1147
            }
1148
1149
            int r_size = roffsets[i] - roffsets[i - 1];
1150
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1151
1152
            JsonbPath path;
1153
            if (!path.seek(r_raw, r_size)) {
1154
                return Status::InvalidArgument(
1155
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1156
                        std::string_view(r_raw, r_size), i);
1157
            }
1158
1159
            writer.reset();
1160
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data,
1161
                            ldata.size, path);
1162
        } //for
1163
        return Status::OK();
1164
    } //function
1165
};
1166
1167
struct JsonbExtractIsnull {
1168
    static constexpr auto name = "json_extract_isnull";
1169
    static constexpr auto alias = "jsonb_extract_isnull";
1170
1171
    using ReturnType = DataTypeUInt8;
1172
    using ColumnType = ColumnUInt8;
1173
    using Container = typename ColumnType::Container;
1174
1175
private:
1176
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
1177
                                              const char* l_raw_str, size_t l_str_size,
1178
2.97k
                                              JsonbPath& path) {
1179
2.97k
        if (null_map[i]) {
1180
0
            res[i] = 0;
1181
0
            return;
1182
0
        }
1183
1184
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1185
2.97k
        const JsonbDocument* doc = nullptr;
1186
2.97k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
1187
2.97k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1188
0
            null_map[i] = 1;
1189
0
            res[i] = 0;
1190
0
            return;
1191
0
        }
1192
1193
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
1194
2.97k
        auto find_result = doc->getValue()->findValue(path);
1195
2.97k
        const auto* value = find_result.value;
1196
1197
2.97k
        if (UNLIKELY(!value)) {
1198
2.55k
            null_map[i] = 1;
1199
2.55k
            res[i] = 0;
1200
2.55k
            return;
1201
2.55k
        }
1202
1203
417
        res[i] = value->isNull();
1204
417
    }
1205
1206
public:
1207
    // for jsonb_extract_int/int64/double
1208
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1209
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1210
                                const ColumnString::Chars& rdata,
1211
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1212
1
                                Container& res, NullMap& null_map) {
1213
1
        size_t size = loffsets.size();
1214
1
        res.resize(size);
1215
1216
13
        for (size_t i = 0; i < loffsets.size(); i++) {
1217
12
            if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) {
1218
8
                res[i] = 0;
1219
8
                null_map[i] = 1;
1220
8
                continue;
1221
8
            }
1222
1223
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1224
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
1225
1226
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1227
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
1228
1229
4
            JsonbPath path;
1230
4
            if (!path.seek(r_raw_str, r_str_size)) {
1231
0
                return Status::InvalidArgument(
1232
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1233
0
                        std::string_view(r_raw_str, r_str_size), i);
1234
0
            }
1235
1236
4
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1237
4
        } //for
1238
1
        return Status::OK();
1239
1
    } //function
1240
1241
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1242
                                const ColumnString::Chars& rdata,
1243
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1244
1
                                Container& res, NullMap& null_map) {
1245
1
        size_t size = roffsets.size();
1246
1
        res.resize(size);
1247
1248
13
        for (size_t i = 0; i < size; i++) {
1249
12
            if (r_null_map && (*r_null_map)[i]) {
1250
4
                res[i] = 0;
1251
4
                null_map[i] = 1;
1252
4
                continue;
1253
4
            }
1254
1255
8
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1256
8
            int r_str_size = roffsets[i] - roffsets[i - 1];
1257
1258
8
            JsonbPath path;
1259
8
            if (!path.seek(r_raw_str, r_str_size)) {
1260
0
                return Status::InvalidArgument(
1261
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1262
0
                        std::string_view(r_raw_str, r_str_size), i);
1263
0
            }
1264
1265
8
            inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path);
1266
8
        } //for
1267
1
        return Status::OK();
1268
1
    } //function
1269
1270
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1271
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1272
1.32k
                                const StringRef& rdata, Container& res, NullMap& null_map) {
1273
1.32k
        size_t size = loffsets.size();
1274
1.32k
        res.resize(size);
1275
1276
1.32k
        JsonbPath path;
1277
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
1278
0
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1279
0
                                           std::string_view(rdata.data, rdata.size));
1280
0
        }
1281
1282
4.50k
        for (size_t i = 0; i < loffsets.size(); i++) {
1283
3.18k
            if (l_null_map && (*l_null_map)[i]) {
1284
228
                res[i] = 0;
1285
228
                null_map[i] = 1;
1286
228
                continue;
1287
228
            }
1288
1289
2.95k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1290
2.95k
            int l_str_size = loffsets[i] - loffsets[i - 1];
1291
1292
2.95k
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1293
2.95k
        } //for
1294
1.32k
        return Status::OK();
1295
1.32k
    } //function
1296
};
1297
1298
struct JsonbTypeJson {
1299
    using T = std::string;
1300
    using ReturnType = DataTypeJsonb;
1301
    using ColumnType = ColumnString;
1302
    static const bool only_get_type = false;
1303
    static const bool no_quotes = false;
1304
};
1305
1306
struct JsonbTypeJsonNoQuotes {
1307
    using T = std::string;
1308
    using ReturnType = DataTypeJsonb;
1309
    using ColumnType = ColumnString;
1310
    static const bool only_get_type = false;
1311
    static const bool no_quotes = true;
1312
};
1313
1314
struct JsonbTypeType {
1315
    using T = std::string;
1316
    using ReturnType = DataTypeString;
1317
    using ColumnType = ColumnString;
1318
    static const bool only_get_type = true;
1319
    static const bool no_quotes = false;
1320
};
1321
1322
struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> {
1323
    static constexpr auto name = "jsonb_extract";
1324
    static constexpr auto alias = "json_extract";
1325
};
1326
1327
struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> {
1328
    static constexpr auto name = "jsonb_extract_no_quotes";
1329
    static constexpr auto alias = "json_extract_no_quotes";
1330
};
1331
1332
struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> {
1333
    static constexpr auto name = "json_type";
1334
    static constexpr auto alias = "jsonb_type";
1335
};
1336
1337
using FunctionJsonbExists = FunctionJsonbExtractPath;
1338
using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>;
1339
1340
using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
1341
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
1342
using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>;
1343
1344
template <typename Impl>
1345
class FunctionJsonbLength : public IFunction {
1346
public:
1347
    static constexpr auto name = "json_length";
1348
1
    String get_name() const override { return name; }
1349
44
    static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); }
1350
1351
35
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1352
35
        return make_nullable(std::make_shared<DataTypeInt32>());
1353
35
    }
1354
43
    DataTypes get_variadic_argument_types_impl() const override {
1355
43
        return Impl::get_variadic_argument_types();
1356
43
    }
1357
35
    size_t get_number_of_arguments() const override {
1358
35
        return get_variadic_argument_types_impl().size();
1359
35
    }
1360
1361
123
    bool use_default_implementation_for_nulls() const override { return false; }
1362
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1363
87
                        uint32_t result, size_t input_rows_count) const override {
1364
87
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1365
87
    }
1366
};
1367
1368
struct JsonbLengthUtil {
1369
    static Status jsonb_length_execute(FunctionContext* context, Block& block,
1370
                                       const ColumnNumbers& arguments, uint32_t result,
1371
88
                                       size_t input_rows_count) {
1372
88
        DCHECK_GE(arguments.size(), 2);
1373
88
        ColumnPtr jsonb_data_column;
1374
88
        bool jsonb_data_const = false;
1375
        // prepare jsonb data column
1376
88
        std::tie(jsonb_data_column, jsonb_data_const) =
1377
88
                unpack_if_const(block.get_by_position(arguments[0]).column);
1378
88
        ColumnPtr path_column;
1379
88
        bool is_const = false;
1380
88
        std::tie(path_column, is_const) =
1381
88
                unpack_if_const(block.get_by_position(arguments[1]).column);
1382
1383
88
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1384
88
        auto return_type = block.get_data_type(result);
1385
88
        MutableColumnPtr res = return_type->create_column();
1386
1387
88
        JsonbPath path;
1388
88
        if (is_const) {
1389
61
            if (path_column->is_null_at(0)) {
1390
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1391
1
                    null_map->get_data()[i] = 1;
1392
1
                    res->insert_data(nullptr, 0);
1393
1
                }
1394
1395
1
                block.replace_by_position(
1396
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1397
1
                return Status::OK();
1398
1
            }
1399
1400
60
            auto path_value = path_column->get_data_at(0);
1401
60
            if (!path.seek(path_value.data, path_value.size)) {
1402
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1403
0
                                               std::string_view(path_value.data, path_value.size));
1404
0
            }
1405
60
        }
1406
1407
262
        for (size_t i = 0; i < input_rows_count; ++i) {
1408
175
            if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) ||
1409
175
                (jsonb_data_column->get_data_at(i).size == 0)) {
1410
18
                null_map->get_data()[i] = 1;
1411
18
                res->insert_data(nullptr, 0);
1412
18
                continue;
1413
18
            }
1414
157
            if (!is_const) {
1415
25
                auto path_value = path_column->get_data_at(i);
1416
25
                path.clean();
1417
25
                if (!path.seek(path_value.data, path_value.size)) {
1418
0
                    return Status::InvalidArgument(
1419
0
                            "Json path error: Invalid Json Path for value: {}",
1420
0
                            std::string_view(reinterpret_cast<const char*>(path_value.data),
1421
0
                                             path_value.size));
1422
0
                }
1423
25
            }
1424
157
            auto jsonb_value = jsonb_data_column->get_data_at(i);
1425
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1426
157
            const JsonbDocument* doc = nullptr;
1427
157
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
1428
157
                                                                  jsonb_value.size, &doc));
1429
157
            auto find_result = doc->getValue()->findValue(path);
1430
157
            const auto* value = find_result.value;
1431
157
            if (UNLIKELY(!value)) {
1432
74
                null_map->get_data()[i] = 1;
1433
74
                res->insert_data(nullptr, 0);
1434
74
                continue;
1435
74
            }
1436
83
            auto length = value->numElements();
1437
83
            res->insert_data(const_cast<const char*>((char*)&length), 0);
1438
83
        }
1439
87
        block.replace_by_position(result,
1440
87
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1441
87
        return Status::OK();
1442
87
    }
1443
};
1444
1445
struct JsonbLengthAndPathImpl {
1446
43
    static DataTypes get_variadic_argument_types() {
1447
43
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
1448
43
    }
1449
1450
    static Status execute_impl(FunctionContext* context, Block& block,
1451
                               const ColumnNumbers& arguments, uint32_t result,
1452
87
                               size_t input_rows_count) {
1453
87
        return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result,
1454
87
                                                     input_rows_count);
1455
87
    }
1456
};
1457
1458
template <typename Impl>
1459
class FunctionJsonbContains : public IFunction {
1460
public:
1461
    static constexpr auto name = "json_contains";
1462
1
    String get_name() const override { return name; }
1463
58
    static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); }
1464
1465
49
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1466
49
        return make_nullable(std::make_shared<DataTypeUInt8>());
1467
49
    }
1468
57
    DataTypes get_variadic_argument_types_impl() const override {
1469
57
        return Impl::get_variadic_argument_types();
1470
57
    }
1471
49
    size_t get_number_of_arguments() const override {
1472
49
        return get_variadic_argument_types_impl().size();
1473
49
    }
1474
1475
176
    bool use_default_implementation_for_nulls() const override { return false; }
1476
1477
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1478
126
                        uint32_t result, size_t input_rows_count) const override {
1479
126
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1480
126
    }
1481
};
1482
1483
struct JsonbContainsUtil {
1484
    static Status jsonb_contains_execute(FunctionContext* context, Block& block,
1485
                                         const ColumnNumbers& arguments, uint32_t result,
1486
128
                                         size_t input_rows_count) {
1487
128
        DCHECK_GE(arguments.size(), 3);
1488
1489
128
        auto jsonb_data1_column = block.get_by_position(arguments[0]).column;
1490
128
        auto jsonb_data2_column = block.get_by_position(arguments[1]).column;
1491
1492
128
        ColumnPtr path_column;
1493
128
        bool is_const = false;
1494
128
        std::tie(path_column, is_const) =
1495
128
                unpack_if_const(block.get_by_position(arguments[2]).column);
1496
1497
128
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1498
128
        auto return_type = block.get_data_type(result);
1499
128
        MutableColumnPtr res = return_type->create_column();
1500
1501
128
        JsonbPath path;
1502
128
        if (is_const) {
1503
84
            if (path_column->is_null_at(0)) {
1504
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1505
1
                    null_map->get_data()[i] = 1;
1506
1
                    res->insert_data(nullptr, 0);
1507
1
                }
1508
1509
1
                block.replace_by_position(
1510
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1511
1
                return Status::OK();
1512
1
            }
1513
1514
83
            auto path_value = path_column->get_data_at(0);
1515
83
            if (!path.seek(path_value.data, path_value.size)) {
1516
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1517
1
                                               std::string_view(path_value.data, path_value.size));
1518
1
            }
1519
83
        }
1520
1521
380
        for (size_t i = 0; i < input_rows_count; ++i) {
1522
255
            if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) ||
1523
255
                path_column->is_null_at(i)) {
1524
28
                null_map->get_data()[i] = 1;
1525
28
                res->insert_data(nullptr, 0);
1526
28
                continue;
1527
28
            }
1528
1529
227
            if (!is_const) {
1530
47
                auto path_value = path_column->get_data_at(i);
1531
47
                path.clean();
1532
47
                if (!path.seek(path_value.data, path_value.size)) {
1533
1
                    return Status::InvalidArgument(
1534
1
                            "Json path error: Invalid Json Path for value: {}",
1535
1
                            std::string_view(path_value.data, path_value.size));
1536
1
                }
1537
47
            }
1538
1539
226
            auto jsonb_value1 = jsonb_data1_column->get_data_at(i);
1540
226
            auto jsonb_value2 = jsonb_data2_column->get_data_at(i);
1541
1542
226
            if (jsonb_value1.size == 0 || jsonb_value2.size == 0) {
1543
1
                null_map->get_data()[i] = 1;
1544
1
                res->insert_data(nullptr, 0);
1545
1
                continue;
1546
1
            }
1547
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1548
225
            const JsonbDocument* doc1 = nullptr;
1549
225
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
1550
225
                                                                  jsonb_value1.size, &doc1));
1551
225
            const JsonbDocument* doc2 = nullptr;
1552
225
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
1553
225
                                                                  jsonb_value2.size, &doc2));
1554
1555
225
            auto find_result = doc1->getValue()->findValue(path);
1556
225
            const auto* value1 = find_result.value;
1557
225
            const JsonbValue* value2 = doc2->getValue();
1558
225
            if (!value1 || !value2) {
1559
45
                null_map->get_data()[i] = 1;
1560
45
                res->insert_data(nullptr, 0);
1561
45
                continue;
1562
45
            }
1563
180
            auto contains_value = value1->contains(value2);
1564
180
            res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
1565
180
        }
1566
1567
125
        block.replace_by_position(result,
1568
125
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1569
125
        return Status::OK();
1570
126
    }
1571
};
1572
1573
template <bool ignore_null>
1574
class FunctionJsonbArray : public IFunction {
1575
public:
1576
    static constexpr auto name = "json_array";
1577
    static constexpr auto alias = "jsonb_array";
1578
1579
48
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb0EE6createEv
Line
Count
Source
1579
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb1EE6createEv
Line
Count
Source
1579
11
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
1580
1581
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev
1582
1583
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv
1584
32
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv
Line
Count
Source
1584
29
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv
Line
Count
Source
1584
3
    bool is_variadic() const override { return true; }
1585
1586
58
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1586
54
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1586
4
    bool use_default_implementation_for_nulls() const override { return false; }
1587
1588
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1589
30
        return std::make_shared<DataTypeJsonb>();
1590
30
    }
_ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1588
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1589
28
        return std::make_shared<DataTypeJsonb>();
1590
28
    }
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1588
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1589
2
        return std::make_shared<DataTypeJsonb>();
1590
2
    }
1591
1592
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1593
30
                        uint32_t result, size_t input_rows_count) const override {
1594
30
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1595
30
        auto column = return_data_type->create_column();
1596
30
        column->reserve(input_rows_count);
1597
1598
30
        JsonbWriter writer;
1599
94
        for (size_t i = 0; i < input_rows_count; ++i) {
1600
60
            writer.writeStartArray();
1601
173
            for (auto argument : arguments) {
1602
173
                auto&& [arg_column, is_const] =
1603
173
                        unpack_if_const(block.get_by_position(argument).column);
1604
173
                if (arg_column->is_nullable()) {
1605
83
                    const auto& nullable_column =
1606
83
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1607
83
                                    *arg_column);
1608
83
                    const auto& null_map = nullable_column.get_null_map_data();
1609
83
                    const auto& nested_column = nullable_column.get_nested_column();
1610
83
                    const auto& jsonb_column =
1611
83
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1612
83
                                    nested_column);
1613
1614
83
                    auto index = index_check_const(i, is_const);
1615
83
                    if (null_map[index]) {
1616
30
                        if constexpr (ignore_null) {
1617
4
                            continue;
1618
26
                        } else {
1619
26
                            writer.writeNull();
1620
26
                        }
1621
53
                    } else {
1622
53
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1623
53
                        const JsonbDocument* doc = nullptr;
1624
53
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1625
53
                                                                        jsonb_binary.size, &doc);
1626
53
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1627
0
                            if constexpr (ignore_null) {
1628
0
                                continue;
1629
0
                            } else {
1630
0
                                writer.writeNull();
1631
0
                            }
1632
53
                        } else {
1633
53
                            writer.writeValue(doc->getValue());
1634
53
                        }
1635
53
                    }
1636
90
                } else {
1637
90
                    const auto& jsonb_column =
1638
90
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1639
90
                                    *arg_column);
1640
1641
90
                    auto index = index_check_const(i, is_const);
1642
90
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1643
90
                    const JsonbDocument* doc = nullptr;
1644
90
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1645
90
                                                                    jsonb_binary.size, &doc);
1646
90
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1647
0
                        if constexpr (ignore_null) {
1648
0
                            continue;
1649
0
                        } else {
1650
0
                            writer.writeNull();
1651
0
                        }
1652
90
                    } else {
1653
90
                        writer.writeValue(doc->getValue());
1654
90
                    }
1655
90
                }
1656
173
            }
1657
19
            writer.writeEndArray();
1658
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1659
19
            writer.reset();
1660
19
        }
1661
1662
6
        block.get_by_position(result).column = std::move(column);
1663
6
        return Status::OK();
1664
30
    }
_ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1593
28
                        uint32_t result, size_t input_rows_count) const override {
1594
28
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1595
28
        auto column = return_data_type->create_column();
1596
28
        column->reserve(input_rows_count);
1597
1598
28
        JsonbWriter writer;
1599
73
        for (size_t i = 0; i < input_rows_count; ++i) {
1600
45
            writer.writeStartArray();
1601
143
            for (auto argument : arguments) {
1602
143
                auto&& [arg_column, is_const] =
1603
143
                        unpack_if_const(block.get_by_position(argument).column);
1604
143
                if (arg_column->is_nullable()) {
1605
58
                    const auto& nullable_column =
1606
58
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1607
58
                                    *arg_column);
1608
58
                    const auto& null_map = nullable_column.get_null_map_data();
1609
58
                    const auto& nested_column = nullable_column.get_nested_column();
1610
58
                    const auto& jsonb_column =
1611
58
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1612
58
                                    nested_column);
1613
1614
58
                    auto index = index_check_const(i, is_const);
1615
58
                    if (null_map[index]) {
1616
                        if constexpr (ignore_null) {
1617
                            continue;
1618
26
                        } else {
1619
26
                            writer.writeNull();
1620
26
                        }
1621
32
                    } else {
1622
32
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1623
32
                        const JsonbDocument* doc = nullptr;
1624
32
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1625
32
                                                                        jsonb_binary.size, &doc);
1626
32
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1627
                            if constexpr (ignore_null) {
1628
                                continue;
1629
0
                            } else {
1630
0
                                writer.writeNull();
1631
0
                            }
1632
32
                        } else {
1633
32
                            writer.writeValue(doc->getValue());
1634
32
                        }
1635
32
                    }
1636
85
                } else {
1637
85
                    const auto& jsonb_column =
1638
85
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1639
85
                                    *arg_column);
1640
1641
85
                    auto index = index_check_const(i, is_const);
1642
85
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1643
85
                    const JsonbDocument* doc = nullptr;
1644
85
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1645
85
                                                                    jsonb_binary.size, &doc);
1646
85
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1647
                        if constexpr (ignore_null) {
1648
                            continue;
1649
0
                        } else {
1650
0
                            writer.writeNull();
1651
0
                        }
1652
85
                    } else {
1653
85
                        writer.writeValue(doc->getValue());
1654
85
                    }
1655
85
                }
1656
143
            }
1657
45
            writer.writeEndArray();
1658
45
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1659
45
            writer.reset();
1660
45
        }
1661
1662
28
        block.get_by_position(result).column = std::move(column);
1663
28
        return Status::OK();
1664
28
    }
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1593
2
                        uint32_t result, size_t input_rows_count) const override {
1594
2
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1595
2
        auto column = return_data_type->create_column();
1596
2
        column->reserve(input_rows_count);
1597
1598
2
        JsonbWriter writer;
1599
21
        for (size_t i = 0; i < input_rows_count; ++i) {
1600
15
            writer.writeStartArray();
1601
30
            for (auto argument : arguments) {
1602
30
                auto&& [arg_column, is_const] =
1603
30
                        unpack_if_const(block.get_by_position(argument).column);
1604
30
                if (arg_column->is_nullable()) {
1605
25
                    const auto& nullable_column =
1606
25
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1607
25
                                    *arg_column);
1608
25
                    const auto& null_map = nullable_column.get_null_map_data();
1609
25
                    const auto& nested_column = nullable_column.get_nested_column();
1610
25
                    const auto& jsonb_column =
1611
25
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1612
25
                                    nested_column);
1613
1614
25
                    auto index = index_check_const(i, is_const);
1615
25
                    if (null_map[index]) {
1616
4
                        if constexpr (ignore_null) {
1617
4
                            continue;
1618
                        } else {
1619
                            writer.writeNull();
1620
                        }
1621
21
                    } else {
1622
21
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1623
21
                        const JsonbDocument* doc = nullptr;
1624
21
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1625
21
                                                                        jsonb_binary.size, &doc);
1626
21
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1627
0
                            if constexpr (ignore_null) {
1628
0
                                continue;
1629
                            } else {
1630
                                writer.writeNull();
1631
                            }
1632
21
                        } else {
1633
21
                            writer.writeValue(doc->getValue());
1634
21
                        }
1635
21
                    }
1636
25
                } else {
1637
5
                    const auto& jsonb_column =
1638
5
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1639
5
                                    *arg_column);
1640
1641
5
                    auto index = index_check_const(i, is_const);
1642
5
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1643
5
                    const JsonbDocument* doc = nullptr;
1644
5
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1645
5
                                                                    jsonb_binary.size, &doc);
1646
5
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1647
0
                        if constexpr (ignore_null) {
1648
0
                            continue;
1649
                        } else {
1650
                            writer.writeNull();
1651
                        }
1652
5
                    } else {
1653
5
                        writer.writeValue(doc->getValue());
1654
5
                    }
1655
5
                }
1656
30
            }
1657
19
            writer.writeEndArray();
1658
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1659
19
            writer.reset();
1660
19
        }
1661
1662
6
        block.get_by_position(result).column = std::move(column);
1663
6
        return Status::OK();
1664
2
    }
1665
};
1666
1667
class FunctionJsonbObject : public IFunction {
1668
public:
1669
    static constexpr auto name = "json_object";
1670
    static constexpr auto alias = "jsonb_object";
1671
1672
43
    static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); }
1673
1674
0
    String get_name() const override { return name; }
1675
1676
0
    size_t get_number_of_arguments() const override { return 0; }
1677
35
    bool is_variadic() const override { return true; }
1678
1679
75
    bool use_default_implementation_for_nulls() const override { return false; }
1680
1681
34
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1682
34
        return std::make_shared<DataTypeJsonb>();
1683
34
    }
1684
1685
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1686
45
                        uint32_t result, size_t input_rows_count) const override {
1687
45
        if (arguments.size() % 2 != 0) {
1688
0
            return Status::InvalidArgument(
1689
0
                    "JSON object must have an even number of arguments, but got: {}",
1690
0
                    arguments.size());
1691
0
        }
1692
1693
45
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1694
1695
45
        auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const,
1696
229
                            const NullMap* null_map, const size_t arg_index, const size_t row_idx) {
1697
229
            auto index = index_check_const(row_idx, is_const);
1698
229
            if (null_map && (*null_map)[index]) {
1699
1
                return Status::InvalidArgument(
1700
1
                        "JSON documents may not contain NULL member name(argument "
1701
1
                        "index:  "
1702
1
                        "{}, row index: {})",
1703
1
                        row_idx, arg_index);
1704
1
            }
1705
1706
228
            auto key_string = key_col.get_data_at(index);
1707
228
            if (key_string.size > 255) {
1708
0
                return Status::InvalidArgument(
1709
0
                        "JSON object keys(argument index: {}) must be less than 256 "
1710
0
                        "bytes, but got size: {}",
1711
0
                        arg_index, key_string.size);
1712
0
            }
1713
228
            writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size));
1714
228
            return Status::OK();
1715
228
        };
1716
1717
45
        auto write_value = [](JsonbWriter& writer, const ColumnString& value_col,
1718
45
                              const bool is_const, const NullMap* null_map, const size_t arg_index,
1719
228
                              const size_t row_idx) {
1720
228
            auto index = index_check_const(row_idx, is_const);
1721
228
            if (null_map && (*null_map)[index]) {
1722
63
                writer.writeNull();
1723
63
                return Status::OK();
1724
63
            }
1725
1726
165
            auto value_string = value_col.get_data_at(index);
1727
165
            const JsonbDocument* doc = nullptr;
1728
165
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
1729
165
                                                                  value_string.size, &doc));
1730
165
            writer.writeValue(doc->getValue());
1731
165
            return Status::OK();
1732
165
        };
1733
1734
152
        for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1735
107
            auto key_argument = arguments[arg_idx];
1736
107
            auto value_argument = arguments[arg_idx + 1];
1737
1738
107
            auto& key_data_type = block.get_by_position(key_argument).type;
1739
107
            auto& value_data_type = block.get_by_position(value_argument).type;
1740
107
            if (!is_string_type(key_data_type->get_primitive_type())) {
1741
0
                return Status::InvalidArgument(
1742
0
                        "JSON object key(argument index: {}) must be String, but got type: "
1743
0
                        "{}(primitive type: {})",
1744
0
                        arg_idx, key_data_type->get_name(),
1745
0
                        static_cast<int>(key_data_type->get_primitive_type()));
1746
0
            }
1747
1748
107
            if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) {
1749
0
                return Status::InvalidArgument(
1750
0
                        "JSON object value(argument index: {}) must be JSON, but got type: {}",
1751
0
                        arg_idx, value_data_type->get_name());
1752
0
            }
1753
107
        }
1754
1755
45
        auto column = return_data_type->create_column();
1756
45
        column->reserve(input_rows_count);
1757
1758
45
        JsonbWriter writer;
1759
114
        for (size_t i = 0; i != input_rows_count; ++i) {
1760
70
            writer.writeStartObject();
1761
298
            for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1762
229
                auto key_argument = arguments[arg_idx];
1763
229
                auto value_argument = arguments[arg_idx + 1];
1764
229
                auto&& [key_column, key_const] =
1765
229
                        unpack_if_const(block.get_by_position(key_argument).column);
1766
229
                auto&& [value_column, value_const] =
1767
229
                        unpack_if_const(block.get_by_position(value_argument).column);
1768
1769
229
                if (key_column->is_nullable()) {
1770
3
                    const auto& nullable_column =
1771
3
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1772
3
                                    *key_column);
1773
3
                    const auto& null_map = nullable_column.get_null_map_data();
1774
3
                    const auto& nested_column = nullable_column.get_nested_column();
1775
3
                    const auto& key_arg_column =
1776
3
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1777
3
                                    nested_column);
1778
1779
3
                    RETURN_IF_ERROR(
1780
3
                            write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i));
1781
226
                } else {
1782
226
                    const auto& key_arg_column =
1783
226
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1784
226
                                    *key_column);
1785
226
                    RETURN_IF_ERROR(
1786
226
                            write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i));
1787
226
                }
1788
1789
228
                if (value_column->is_nullable()) {
1790
118
                    const auto& nullable_column =
1791
118
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1792
118
                                    *value_column);
1793
118
                    const auto& null_map = nullable_column.get_null_map_data();
1794
118
                    const auto& nested_column = nullable_column.get_nested_column();
1795
118
                    const auto& value_arg_column =
1796
118
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1797
118
                                    nested_column);
1798
1799
118
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map,
1800
118
                                                arg_idx + 1, i));
1801
118
                } else {
1802
110
                    const auto& value_arg_column =
1803
110
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1804
110
                                    *value_column);
1805
110
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr,
1806
110
                                                arg_idx + 1, i));
1807
110
                }
1808
228
            }
1809
1810
69
            writer.writeEndObject();
1811
69
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1812
69
            writer.reset();
1813
69
        }
1814
1815
44
        block.get_by_position(result).column = std::move(column);
1816
44
        return Status::OK();
1817
45
    }
1818
};
1819
1820
enum class JsonbModifyType { Insert, Set, Replace };
1821
1822
template <JsonbModifyType modify_type>
1823
struct JsonbModifyName {
1824
    static constexpr auto name = "jsonb_modify";
1825
    static constexpr auto alias = "json_modify";
1826
};
1827
1828
template <>
1829
struct JsonbModifyName<JsonbModifyType::Insert> {
1830
    static constexpr auto name = "jsonb_insert";
1831
    static constexpr auto alias = "json_insert";
1832
};
1833
template <>
1834
struct JsonbModifyName<JsonbModifyType::Set> {
1835
    static constexpr auto name = "jsonb_set";
1836
    static constexpr auto alias = "json_set";
1837
};
1838
template <>
1839
struct JsonbModifyName<JsonbModifyType::Replace> {
1840
    static constexpr auto name = "jsonb_replace";
1841
    static constexpr auto alias = "json_replace";
1842
};
1843
1844
template <JsonbModifyType modify_type>
1845
class FunctionJsonbModify : public IFunction {
1846
public:
1847
    static constexpr auto name = JsonbModifyName<modify_type>::name;
1848
    static constexpr auto alias = JsonbModifyName<modify_type>::alias;
1849
1850
119
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv
Line
Count
Source
1850
40
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv
Line
Count
Source
1850
39
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv
Line
Count
Source
1850
40
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
1851
1852
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev
1853
1854
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv
1855
95
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv
Line
Count
Source
1855
32
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv
Line
Count
Source
1855
31
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv
Line
Count
Source
1855
32
    bool is_variadic() const override { return true; }
1856
1857
184
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1857
62
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1857
60
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
1857
62
    bool use_default_implementation_for_nulls() const override { return false; }
1858
1859
92
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1860
92
        return make_nullable(std::make_shared<DataTypeJsonb>());
1861
92
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1859
31
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1860
31
        return make_nullable(std::make_shared<DataTypeJsonb>());
1861
31
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1859
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1860
30
        return make_nullable(std::make_shared<DataTypeJsonb>());
1861
30
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1859
31
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1860
31
        return make_nullable(std::make_shared<DataTypeJsonb>());
1861
31
    }
1862
1863
    Status create_all_null_result(const DataTypePtr& return_data_type, Block& block,
1864
0
                                  uint32_t result, size_t input_rows_count) const {
1865
0
        auto result_column = return_data_type->create_column();
1866
0
        result_column->insert_default();
1867
0
        auto const_column = ColumnConst::create(std::move(result_column), input_rows_count);
1868
0
        block.get_by_position(result).column = std::move(const_column);
1869
0
        return Status::OK();
1870
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
1871
1872
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1873
92
                        uint32_t result, size_t input_rows_count) const override {
1874
92
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1875
0
            return Status::InvalidArgument(
1876
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1877
0
                    "but got: {}",
1878
0
                    name, arguments.size());
1879
0
        }
1880
1881
92
        const size_t keys_count = (arguments.size() - 1) / 2;
1882
1883
92
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1884
1885
92
        auto result_column = return_data_type->create_column();
1886
92
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1887
92
        auto& null_map = result_nullable_col.get_null_map_data();
1888
92
        auto& res_string_column =
1889
92
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1890
92
        auto& res_chars = res_string_column.get_chars();
1891
92
        auto& res_offsets = res_string_column.get_offsets();
1892
1893
92
        null_map.resize_fill(input_rows_count, 0);
1894
92
        res_offsets.resize(input_rows_count);
1895
92
        auto&& [json_data_arg_column, json_data_const] =
1896
92
                unpack_if_const(block.get_by_position(arguments[0]).column);
1897
1898
92
        if (json_data_const) {
1899
14
            if (json_data_arg_column->is_null_at(0)) {
1900
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1901
0
            }
1902
14
        }
1903
1904
92
        std::vector<const ColumnString*> json_path_columns(keys_count);
1905
92
        std::vector<bool> json_path_constant(keys_count);
1906
92
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1907
1908
92
        std::vector<const ColumnString*> json_value_columns(keys_count);
1909
92
        std::vector<bool> json_value_constant(keys_count);
1910
92
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1911
1912
92
        const NullMap* json_data_null_map = nullptr;
1913
92
        const ColumnString* json_data_column;
1914
92
        if (json_data_arg_column->is_nullable()) {
1915
92
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1916
92
            json_data_null_map = &nullable_column.get_null_map_data();
1917
92
            const auto& nested_column = nullable_column.get_nested_column();
1918
92
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1919
92
        } else {
1920
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1921
0
        }
1922
1923
221
        for (size_t i = 1; i < arguments.size(); i += 2) {
1924
129
            auto&& [path_column, path_const] =
1925
129
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1926
129
            auto&& [value_column, value_const] =
1927
129
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1928
1929
129
            if (path_const) {
1930
39
                if (path_column->is_null_at(0)) {
1931
0
                    return create_all_null_result(return_data_type, block, result,
1932
0
                                                  input_rows_count);
1933
0
                }
1934
39
            }
1935
1936
129
            json_path_constant[i / 2] = path_const;
1937
129
            if (path_column->is_nullable()) {
1938
6
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1939
6
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1940
6
                const auto& nested_column = nullable_column.get_nested_column();
1941
6
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1942
123
            } else {
1943
123
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1944
123
            }
1945
1946
129
            json_value_constant[i / 2] = value_const;
1947
129
            if (value_column->is_nullable()) {
1948
69
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1949
69
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1950
69
                const auto& nested_column = nullable_column.get_nested_column();
1951
69
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1952
69
            } else {
1953
60
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1954
60
            }
1955
129
        }
1956
1957
92
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1958
92
        if (json_data_const) {
1959
14
            auto json_data_string = json_data_column->get_data_at(0);
1960
14
            const JsonbDocument* doc = nullptr;
1961
14
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1962
14
                                                                  json_data_string.size, &doc));
1963
14
            if (!doc || !doc->getValue()) [[unlikely]] {
1964
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1965
0
            }
1966
77
            for (size_t i = 0; i != input_rows_count; ++i) {
1967
63
                json_documents[i] = doc;
1968
63
            }
1969
78
        } else {
1970
156
            for (size_t i = 0; i != input_rows_count; ++i) {
1971
78
                if (json_data_null_map && (*json_data_null_map)[i]) {
1972
0
                    null_map[i] = 1;
1973
0
                    json_documents[i] = nullptr;
1974
0
                    continue;
1975
0
                }
1976
1977
78
                auto json_data_string = json_data_column->get_data_at(i);
1978
78
                const JsonbDocument* doc = nullptr;
1979
78
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1980
78
                                                                      json_data_string.size, &doc));
1981
78
                if (!doc || !doc->getValue()) [[unlikely]] {
1982
0
                    null_map[i] = 1;
1983
0
                    continue;
1984
0
                }
1985
78
                json_documents[i] = doc;
1986
78
            }
1987
78
        }
1988
1989
92
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1990
92
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1991
1992
92
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1993
92
                                               json_path_columns, json_path_constant,
1994
92
                                               json_path_null_maps, json_value_columns,
1995
92
                                               json_value_constant, json_value_null_maps));
1996
1997
84
        JsonbWriter writer;
1998
84
        struct DocumentBuffer {
1999
84
            DorisUniqueBufferPtr<char> ptr;
2000
84
            size_t size = 0;
2001
84
            size_t capacity = 0;
2002
84
        };
2003
2004
84
        DocumentBuffer tmp_buffer;
2005
2006
256
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2007
423
            for (size_t i = 1; i < arguments.size(); i += 2) {
2008
251
                const size_t index = i / 2;
2009
251
                auto& json_path = json_paths[index];
2010
251
                auto& json_value = json_values[index];
2011
2012
251
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2013
251
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2014
2015
251
                if (null_map[row_idx]) {
2016
0
                    continue;
2017
0
                }
2018
2019
251
                if (json_documents[row_idx] == nullptr) {
2020
0
                    null_map[row_idx] = 1;
2021
0
                    continue;
2022
0
                }
2023
2024
251
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2025
4
                    null_map[row_idx] = 1;
2026
4
                    continue;
2027
4
                }
2028
2029
247
                auto find_result =
2030
247
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2031
2032
247
                if (find_result.is_wildcard) {
2033
0
                    return Status::InvalidArgument(
2034
0
                            " In this situation, path expressions may not contain the * and ** "
2035
0
                            "tokens or an array range, argument index: {}, row index: {}",
2036
0
                            i, row_idx);
2037
0
                }
2038
2039
247
                if constexpr (modify_type == JsonbModifyType::Insert) {
2040
78
                    if (find_result.value) {
2041
32
                        continue;
2042
32
                    }
2043
86
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2044
86
                    if (!find_result.value) {
2045
16
                        continue;
2046
16
                    }
2047
86
                }
2048
2049
116
                std::vector<const JsonbValue*> parents;
2050
2051
247
                bool replace = false;
2052
247
                parents.emplace_back(json_documents[row_idx]->getValue());
2053
247
                if (find_result.value) {
2054
                    // find target path, replace it with the new value.
2055
128
                    replace = true;
2056
128
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2057
128
                                               json_path[path_index], parents)) {
2058
0
                        DCHECK(false);
2059
0
                        continue;
2060
0
                    }
2061
128
                } else {
2062
                    // does not find target path, insert the new value.
2063
119
                    JsonbPath new_path;
2064
183
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2065
64
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2066
64
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2067
64
                                current_leg->leg_ptr, current_leg->leg_len,
2068
64
                                current_leg->array_index, current_leg->type);
2069
64
                        new_path.add_leg_to_leg_vector(std::move(leg));
2070
64
                    }
2071
2072
119
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2073
119
                                               parents)) {
2074
12
                        continue;
2075
12
                    }
2076
119
                }
2077
2078
235
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2079
235
                leg_info* last_leg =
2080
235
                        legs_count > 0
2081
235
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2082
235
                                : nullptr;
2083
235
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2084
235
                                                 json_value[value_index], replace, last_leg,
2085
235
                                                 writer));
2086
2087
235
                auto* writer_output = writer.getOutput();
2088
235
                if (writer_output->getSize() > tmp_buffer.capacity) {
2089
73
                    tmp_buffer.capacity =
2090
73
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2091
73
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2092
73
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2093
73
                }
2094
2095
235
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2096
235
                tmp_buffer.size = writer_output->getSize();
2097
2098
235
                writer.reset();
2099
2100
235
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2101
235
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2102
235
            }
2103
2104
172
            if (!null_map[row_idx]) {
2105
120
                const auto* jsonb_document = json_documents[row_idx];
2106
120
                const auto size = jsonb_document->numPackedBytes();
2107
120
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2108
120
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2109
120
            }
2110
2111
172
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2112
2113
172
            if (!null_map[row_idx]) {
2114
120
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2115
120
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2116
120
                const JsonbDocument* doc = nullptr;
2117
120
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2118
120
                        reinterpret_cast<const char*>(ptr), size, &doc));
2119
120
            }
2120
172
        }
2121
2122
132
        block.get_by_position(result).column = std::move(result_column);
2123
132
        return Status::OK();
2124
84
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1873
31
                        uint32_t result, size_t input_rows_count) const override {
1874
31
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1875
0
            return Status::InvalidArgument(
1876
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1877
0
                    "but got: {}",
1878
0
                    name, arguments.size());
1879
0
        }
1880
1881
31
        const size_t keys_count = (arguments.size() - 1) / 2;
1882
1883
31
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1884
1885
31
        auto result_column = return_data_type->create_column();
1886
31
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1887
31
        auto& null_map = result_nullable_col.get_null_map_data();
1888
31
        auto& res_string_column =
1889
31
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1890
31
        auto& res_chars = res_string_column.get_chars();
1891
31
        auto& res_offsets = res_string_column.get_offsets();
1892
1893
31
        null_map.resize_fill(input_rows_count, 0);
1894
31
        res_offsets.resize(input_rows_count);
1895
31
        auto&& [json_data_arg_column, json_data_const] =
1896
31
                unpack_if_const(block.get_by_position(arguments[0]).column);
1897
1898
31
        if (json_data_const) {
1899
6
            if (json_data_arg_column->is_null_at(0)) {
1900
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1901
0
            }
1902
6
        }
1903
1904
31
        std::vector<const ColumnString*> json_path_columns(keys_count);
1905
31
        std::vector<bool> json_path_constant(keys_count);
1906
31
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1907
1908
31
        std::vector<const ColumnString*> json_value_columns(keys_count);
1909
31
        std::vector<bool> json_value_constant(keys_count);
1910
31
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1911
1912
31
        const NullMap* json_data_null_map = nullptr;
1913
31
        const ColumnString* json_data_column;
1914
31
        if (json_data_arg_column->is_nullable()) {
1915
31
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1916
31
            json_data_null_map = &nullable_column.get_null_map_data();
1917
31
            const auto& nested_column = nullable_column.get_nested_column();
1918
31
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1919
31
        } else {
1920
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1921
0
        }
1922
1923
73
        for (size_t i = 1; i < arguments.size(); i += 2) {
1924
42
            auto&& [path_column, path_const] =
1925
42
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1926
42
            auto&& [value_column, value_const] =
1927
42
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1928
1929
42
            if (path_const) {
1930
11
                if (path_column->is_null_at(0)) {
1931
0
                    return create_all_null_result(return_data_type, block, result,
1932
0
                                                  input_rows_count);
1933
0
                }
1934
11
            }
1935
1936
42
            json_path_constant[i / 2] = path_const;
1937
42
            if (path_column->is_nullable()) {
1938
4
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1939
4
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1940
4
                const auto& nested_column = nullable_column.get_nested_column();
1941
4
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1942
38
            } else {
1943
38
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1944
38
            }
1945
1946
42
            json_value_constant[i / 2] = value_const;
1947
42
            if (value_column->is_nullable()) {
1948
22
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1949
22
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1950
22
                const auto& nested_column = nullable_column.get_nested_column();
1951
22
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1952
22
            } else {
1953
20
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1954
20
            }
1955
42
        }
1956
1957
31
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1958
31
        if (json_data_const) {
1959
6
            auto json_data_string = json_data_column->get_data_at(0);
1960
6
            const JsonbDocument* doc = nullptr;
1961
6
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1962
6
                                                                  json_data_string.size, &doc));
1963
6
            if (!doc || !doc->getValue()) [[unlikely]] {
1964
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1965
0
            }
1966
35
            for (size_t i = 0; i != input_rows_count; ++i) {
1967
29
                json_documents[i] = doc;
1968
29
            }
1969
25
        } else {
1970
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1971
25
                if (json_data_null_map && (*json_data_null_map)[i]) {
1972
0
                    null_map[i] = 1;
1973
0
                    json_documents[i] = nullptr;
1974
0
                    continue;
1975
0
                }
1976
1977
25
                auto json_data_string = json_data_column->get_data_at(i);
1978
25
                const JsonbDocument* doc = nullptr;
1979
25
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1980
25
                                                                      json_data_string.size, &doc));
1981
25
                if (!doc || !doc->getValue()) [[unlikely]] {
1982
0
                    null_map[i] = 1;
1983
0
                    continue;
1984
0
                }
1985
25
                json_documents[i] = doc;
1986
25
            }
1987
25
        }
1988
1989
31
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1990
31
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1991
1992
31
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1993
31
                                               json_path_columns, json_path_constant,
1994
31
                                               json_path_null_maps, json_value_columns,
1995
31
                                               json_value_constant, json_value_null_maps));
1996
1997
27
        JsonbWriter writer;
1998
27
        struct DocumentBuffer {
1999
27
            DorisUniqueBufferPtr<char> ptr;
2000
27
            size_t size = 0;
2001
27
            size_t capacity = 0;
2002
27
        };
2003
2004
27
        DocumentBuffer tmp_buffer;
2005
2006
100
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2007
153
            for (size_t i = 1; i < arguments.size(); i += 2) {
2008
80
                const size_t index = i / 2;
2009
80
                auto& json_path = json_paths[index];
2010
80
                auto& json_value = json_values[index];
2011
2012
80
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2013
80
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2014
2015
80
                if (null_map[row_idx]) {
2016
0
                    continue;
2017
0
                }
2018
2019
80
                if (json_documents[row_idx] == nullptr) {
2020
0
                    null_map[row_idx] = 1;
2021
0
                    continue;
2022
0
                }
2023
2024
80
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2025
2
                    null_map[row_idx] = 1;
2026
2
                    continue;
2027
2
                }
2028
2029
78
                auto find_result =
2030
78
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2031
2032
78
                if (find_result.is_wildcard) {
2033
0
                    return Status::InvalidArgument(
2034
0
                            " In this situation, path expressions may not contain the * and ** "
2035
0
                            "tokens or an array range, argument index: {}, row index: {}",
2036
0
                            i, row_idx);
2037
0
                }
2038
2039
78
                if constexpr (modify_type == JsonbModifyType::Insert) {
2040
78
                    if (find_result.value) {
2041
32
                        continue;
2042
32
                    }
2043
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2044
                    if (!find_result.value) {
2045
                        continue;
2046
                    }
2047
                }
2048
2049
46
                std::vector<const JsonbValue*> parents;
2050
2051
78
                bool replace = false;
2052
78
                parents.emplace_back(json_documents[row_idx]->getValue());
2053
78
                if (find_result.value) {
2054
                    // find target path, replace it with the new value.
2055
0
                    replace = true;
2056
0
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2057
0
                                               json_path[path_index], parents)) {
2058
0
                        DCHECK(false);
2059
0
                        continue;
2060
0
                    }
2061
78
                } else {
2062
                    // does not find target path, insert the new value.
2063
78
                    JsonbPath new_path;
2064
121
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2065
43
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2066
43
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2067
43
                                current_leg->leg_ptr, current_leg->leg_len,
2068
43
                                current_leg->array_index, current_leg->type);
2069
43
                        new_path.add_leg_to_leg_vector(std::move(leg));
2070
43
                    }
2071
2072
78
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2073
78
                                               parents)) {
2074
1
                        continue;
2075
1
                    }
2076
78
                }
2077
2078
77
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2079
77
                leg_info* last_leg =
2080
77
                        legs_count > 0
2081
77
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2082
77
                                : nullptr;
2083
77
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2084
77
                                                 json_value[value_index], replace, last_leg,
2085
77
                                                 writer));
2086
2087
77
                auto* writer_output = writer.getOutput();
2088
77
                if (writer_output->getSize() > tmp_buffer.capacity) {
2089
21
                    tmp_buffer.capacity =
2090
21
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2091
21
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2092
21
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2093
21
                }
2094
2095
77
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2096
77
                tmp_buffer.size = writer_output->getSize();
2097
2098
77
                writer.reset();
2099
2100
77
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2101
77
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2102
77
            }
2103
2104
73
            if (!null_map[row_idx]) {
2105
39
                const auto* jsonb_document = json_documents[row_idx];
2106
39
                const auto size = jsonb_document->numPackedBytes();
2107
39
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2108
39
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2109
39
            }
2110
2111
73
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2112
2113
73
            if (!null_map[row_idx]) {
2114
39
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2115
39
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2116
39
                const JsonbDocument* doc = nullptr;
2117
39
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2118
39
                        reinterpret_cast<const char*>(ptr), size, &doc));
2119
39
            }
2120
73
        }
2121
2122
59
        block.get_by_position(result).column = std::move(result_column);
2123
59
        return Status::OK();
2124
27
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1873
30
                        uint32_t result, size_t input_rows_count) const override {
1874
30
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1875
0
            return Status::InvalidArgument(
1876
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1877
0
                    "but got: {}",
1878
0
                    name, arguments.size());
1879
0
        }
1880
1881
30
        const size_t keys_count = (arguments.size() - 1) / 2;
1882
1883
30
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1884
1885
30
        auto result_column = return_data_type->create_column();
1886
30
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1887
30
        auto& null_map = result_nullable_col.get_null_map_data();
1888
30
        auto& res_string_column =
1889
30
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1890
30
        auto& res_chars = res_string_column.get_chars();
1891
30
        auto& res_offsets = res_string_column.get_offsets();
1892
1893
30
        null_map.resize_fill(input_rows_count, 0);
1894
30
        res_offsets.resize(input_rows_count);
1895
30
        auto&& [json_data_arg_column, json_data_const] =
1896
30
                unpack_if_const(block.get_by_position(arguments[0]).column);
1897
1898
30
        if (json_data_const) {
1899
4
            if (json_data_arg_column->is_null_at(0)) {
1900
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1901
0
            }
1902
4
        }
1903
1904
30
        std::vector<const ColumnString*> json_path_columns(keys_count);
1905
30
        std::vector<bool> json_path_constant(keys_count);
1906
30
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1907
1908
30
        std::vector<const ColumnString*> json_value_columns(keys_count);
1909
30
        std::vector<bool> json_value_constant(keys_count);
1910
30
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1911
1912
30
        const NullMap* json_data_null_map = nullptr;
1913
30
        const ColumnString* json_data_column;
1914
30
        if (json_data_arg_column->is_nullable()) {
1915
30
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1916
30
            json_data_null_map = &nullable_column.get_null_map_data();
1917
30
            const auto& nested_column = nullable_column.get_nested_column();
1918
30
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1919
30
        } else {
1920
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1921
0
        }
1922
1923
72
        for (size_t i = 1; i < arguments.size(); i += 2) {
1924
42
            auto&& [path_column, path_const] =
1925
42
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1926
42
            auto&& [value_column, value_const] =
1927
42
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1928
1929
42
            if (path_const) {
1930
13
                if (path_column->is_null_at(0)) {
1931
0
                    return create_all_null_result(return_data_type, block, result,
1932
0
                                                  input_rows_count);
1933
0
                }
1934
13
            }
1935
1936
42
            json_path_constant[i / 2] = path_const;
1937
42
            if (path_column->is_nullable()) {
1938
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1939
1
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1940
1
                const auto& nested_column = nullable_column.get_nested_column();
1941
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1942
41
            } else {
1943
41
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1944
41
            }
1945
1946
42
            json_value_constant[i / 2] = value_const;
1947
42
            if (value_column->is_nullable()) {
1948
22
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1949
22
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1950
22
                const auto& nested_column = nullable_column.get_nested_column();
1951
22
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1952
22
            } else {
1953
20
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1954
20
            }
1955
42
        }
1956
1957
30
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1958
30
        if (json_data_const) {
1959
4
            auto json_data_string = json_data_column->get_data_at(0);
1960
4
            const JsonbDocument* doc = nullptr;
1961
4
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1962
4
                                                                  json_data_string.size, &doc));
1963
4
            if (!doc || !doc->getValue()) [[unlikely]] {
1964
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1965
0
            }
1966
22
            for (size_t i = 0; i != input_rows_count; ++i) {
1967
18
                json_documents[i] = doc;
1968
18
            }
1969
26
        } else {
1970
52
            for (size_t i = 0; i != input_rows_count; ++i) {
1971
26
                if (json_data_null_map && (*json_data_null_map)[i]) {
1972
0
                    null_map[i] = 1;
1973
0
                    json_documents[i] = nullptr;
1974
0
                    continue;
1975
0
                }
1976
1977
26
                auto json_data_string = json_data_column->get_data_at(i);
1978
26
                const JsonbDocument* doc = nullptr;
1979
26
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1980
26
                                                                      json_data_string.size, &doc));
1981
26
                if (!doc || !doc->getValue()) [[unlikely]] {
1982
0
                    null_map[i] = 1;
1983
0
                    continue;
1984
0
                }
1985
26
                json_documents[i] = doc;
1986
26
            }
1987
26
        }
1988
1989
30
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1990
30
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1991
1992
30
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1993
30
                                               json_path_columns, json_path_constant,
1994
30
                                               json_path_null_maps, json_value_columns,
1995
30
                                               json_value_constant, json_value_null_maps));
1996
1997
28
        JsonbWriter writer;
1998
28
        struct DocumentBuffer {
1999
28
            DorisUniqueBufferPtr<char> ptr;
2000
28
            size_t size = 0;
2001
28
            size_t capacity = 0;
2002
28
        };
2003
2004
28
        DocumentBuffer tmp_buffer;
2005
2006
70
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2007
126
            for (size_t i = 1; i < arguments.size(); i += 2) {
2008
84
                const size_t index = i / 2;
2009
84
                auto& json_path = json_paths[index];
2010
84
                auto& json_value = json_values[index];
2011
2012
84
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2013
84
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2014
2015
84
                if (null_map[row_idx]) {
2016
0
                    continue;
2017
0
                }
2018
2019
84
                if (json_documents[row_idx] == nullptr) {
2020
0
                    null_map[row_idx] = 1;
2021
0
                    continue;
2022
0
                }
2023
2024
84
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2025
1
                    null_map[row_idx] = 1;
2026
1
                    continue;
2027
1
                }
2028
2029
83
                auto find_result =
2030
83
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2031
2032
83
                if (find_result.is_wildcard) {
2033
0
                    return Status::InvalidArgument(
2034
0
                            " In this situation, path expressions may not contain the * and ** "
2035
0
                            "tokens or an array range, argument index: {}, row index: {}",
2036
0
                            i, row_idx);
2037
0
                }
2038
2039
                if constexpr (modify_type == JsonbModifyType::Insert) {
2040
                    if (find_result.value) {
2041
                        continue;
2042
                    }
2043
83
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2044
83
                    if (!find_result.value) {
2045
83
                        continue;
2046
83
                    }
2047
83
                }
2048
2049
83
                std::vector<const JsonbValue*> parents;
2050
2051
83
                bool replace = false;
2052
83
                parents.emplace_back(json_documents[row_idx]->getValue());
2053
83
                if (find_result.value) {
2054
                    // find target path, replace it with the new value.
2055
58
                    replace = true;
2056
58
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2057
58
                                               json_path[path_index], parents)) {
2058
0
                        DCHECK(false);
2059
0
                        continue;
2060
0
                    }
2061
58
                } else {
2062
                    // does not find target path, insert the new value.
2063
25
                    JsonbPath new_path;
2064
46
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2065
21
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2066
21
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2067
21
                                current_leg->leg_ptr, current_leg->leg_len,
2068
21
                                current_leg->array_index, current_leg->type);
2069
21
                        new_path.add_leg_to_leg_vector(std::move(leg));
2070
21
                    }
2071
2072
25
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2073
25
                                               parents)) {
2074
11
                        continue;
2075
11
                    }
2076
25
                }
2077
2078
72
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2079
72
                leg_info* last_leg =
2080
72
                        legs_count > 0
2081
72
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2082
72
                                : nullptr;
2083
72
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2084
72
                                                 json_value[value_index], replace, last_leg,
2085
72
                                                 writer));
2086
2087
72
                auto* writer_output = writer.getOutput();
2088
72
                if (writer_output->getSize() > tmp_buffer.capacity) {
2089
26
                    tmp_buffer.capacity =
2090
26
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2091
26
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2092
26
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2093
26
                }
2094
2095
72
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2096
72
                tmp_buffer.size = writer_output->getSize();
2097
2098
72
                writer.reset();
2099
2100
72
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2101
72
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2102
72
            }
2103
2104
42
            if (!null_map[row_idx]) {
2105
41
                const auto* jsonb_document = json_documents[row_idx];
2106
41
                const auto size = jsonb_document->numPackedBytes();
2107
41
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2108
41
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2109
41
            }
2110
2111
42
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2112
2113
42
            if (!null_map[row_idx]) {
2114
41
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2115
41
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2116
41
                const JsonbDocument* doc = nullptr;
2117
41
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2118
41
                        reinterpret_cast<const char*>(ptr), size, &doc));
2119
41
            }
2120
42
        }
2121
2122
28
        block.get_by_position(result).column = std::move(result_column);
2123
28
        return Status::OK();
2124
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1873
31
                        uint32_t result, size_t input_rows_count) const override {
1874
31
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1875
0
            return Status::InvalidArgument(
1876
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1877
0
                    "but got: {}",
1878
0
                    name, arguments.size());
1879
0
        }
1880
1881
31
        const size_t keys_count = (arguments.size() - 1) / 2;
1882
1883
31
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1884
1885
31
        auto result_column = return_data_type->create_column();
1886
31
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1887
31
        auto& null_map = result_nullable_col.get_null_map_data();
1888
31
        auto& res_string_column =
1889
31
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1890
31
        auto& res_chars = res_string_column.get_chars();
1891
31
        auto& res_offsets = res_string_column.get_offsets();
1892
1893
31
        null_map.resize_fill(input_rows_count, 0);
1894
31
        res_offsets.resize(input_rows_count);
1895
31
        auto&& [json_data_arg_column, json_data_const] =
1896
31
                unpack_if_const(block.get_by_position(arguments[0]).column);
1897
1898
31
        if (json_data_const) {
1899
4
            if (json_data_arg_column->is_null_at(0)) {
1900
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1901
0
            }
1902
4
        }
1903
1904
31
        std::vector<const ColumnString*> json_path_columns(keys_count);
1905
31
        std::vector<bool> json_path_constant(keys_count);
1906
31
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1907
1908
31
        std::vector<const ColumnString*> json_value_columns(keys_count);
1909
31
        std::vector<bool> json_value_constant(keys_count);
1910
31
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1911
1912
31
        const NullMap* json_data_null_map = nullptr;
1913
31
        const ColumnString* json_data_column;
1914
31
        if (json_data_arg_column->is_nullable()) {
1915
31
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1916
31
            json_data_null_map = &nullable_column.get_null_map_data();
1917
31
            const auto& nested_column = nullable_column.get_nested_column();
1918
31
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1919
31
        } else {
1920
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1921
0
        }
1922
1923
76
        for (size_t i = 1; i < arguments.size(); i += 2) {
1924
45
            auto&& [path_column, path_const] =
1925
45
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1926
45
            auto&& [value_column, value_const] =
1927
45
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1928
1929
45
            if (path_const) {
1930
15
                if (path_column->is_null_at(0)) {
1931
0
                    return create_all_null_result(return_data_type, block, result,
1932
0
                                                  input_rows_count);
1933
0
                }
1934
15
            }
1935
1936
45
            json_path_constant[i / 2] = path_const;
1937
45
            if (path_column->is_nullable()) {
1938
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1939
1
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1940
1
                const auto& nested_column = nullable_column.get_nested_column();
1941
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1942
44
            } else {
1943
44
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1944
44
            }
1945
1946
45
            json_value_constant[i / 2] = value_const;
1947
45
            if (value_column->is_nullable()) {
1948
25
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1949
25
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1950
25
                const auto& nested_column = nullable_column.get_nested_column();
1951
25
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1952
25
            } else {
1953
20
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1954
20
            }
1955
45
        }
1956
1957
31
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1958
31
        if (json_data_const) {
1959
4
            auto json_data_string = json_data_column->get_data_at(0);
1960
4
            const JsonbDocument* doc = nullptr;
1961
4
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1962
4
                                                                  json_data_string.size, &doc));
1963
4
            if (!doc || !doc->getValue()) [[unlikely]] {
1964
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1965
0
            }
1966
20
            for (size_t i = 0; i != input_rows_count; ++i) {
1967
16
                json_documents[i] = doc;
1968
16
            }
1969
27
        } else {
1970
54
            for (size_t i = 0; i != input_rows_count; ++i) {
1971
27
                if (json_data_null_map && (*json_data_null_map)[i]) {
1972
0
                    null_map[i] = 1;
1973
0
                    json_documents[i] = nullptr;
1974
0
                    continue;
1975
0
                }
1976
1977
27
                auto json_data_string = json_data_column->get_data_at(i);
1978
27
                const JsonbDocument* doc = nullptr;
1979
27
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1980
27
                                                                      json_data_string.size, &doc));
1981
27
                if (!doc || !doc->getValue()) [[unlikely]] {
1982
0
                    null_map[i] = 1;
1983
0
                    continue;
1984
0
                }
1985
27
                json_documents[i] = doc;
1986
27
            }
1987
27
        }
1988
1989
31
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1990
31
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1991
1992
31
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1993
31
                                               json_path_columns, json_path_constant,
1994
31
                                               json_path_null_maps, json_value_columns,
1995
31
                                               json_value_constant, json_value_null_maps));
1996
1997
29
        JsonbWriter writer;
1998
29
        struct DocumentBuffer {
1999
29
            DorisUniqueBufferPtr<char> ptr;
2000
29
            size_t size = 0;
2001
29
            size_t capacity = 0;
2002
29
        };
2003
2004
29
        DocumentBuffer tmp_buffer;
2005
2006
86
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2007
144
            for (size_t i = 1; i < arguments.size(); i += 2) {
2008
87
                const size_t index = i / 2;
2009
87
                auto& json_path = json_paths[index];
2010
87
                auto& json_value = json_values[index];
2011
2012
87
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2013
87
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2014
2015
87
                if (null_map[row_idx]) {
2016
0
                    continue;
2017
0
                }
2018
2019
87
                if (json_documents[row_idx] == nullptr) {
2020
0
                    null_map[row_idx] = 1;
2021
0
                    continue;
2022
0
                }
2023
2024
87
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2025
1
                    null_map[row_idx] = 1;
2026
1
                    continue;
2027
1
                }
2028
2029
86
                auto find_result =
2030
86
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2031
2032
86
                if (find_result.is_wildcard) {
2033
0
                    return Status::InvalidArgument(
2034
0
                            " In this situation, path expressions may not contain the * and ** "
2035
0
                            "tokens or an array range, argument index: {}, row index: {}",
2036
0
                            i, row_idx);
2037
0
                }
2038
2039
                if constexpr (modify_type == JsonbModifyType::Insert) {
2040
                    if (find_result.value) {
2041
                        continue;
2042
                    }
2043
86
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2044
86
                    if (!find_result.value) {
2045
16
                        continue;
2046
16
                    }
2047
86
                }
2048
2049
70
                std::vector<const JsonbValue*> parents;
2050
2051
86
                bool replace = false;
2052
86
                parents.emplace_back(json_documents[row_idx]->getValue());
2053
86
                if (find_result.value) {
2054
                    // find target path, replace it with the new value.
2055
70
                    replace = true;
2056
70
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2057
70
                                               json_path[path_index], parents)) {
2058
0
                        DCHECK(false);
2059
0
                        continue;
2060
0
                    }
2061
70
                } else {
2062
                    // does not find target path, insert the new value.
2063
16
                    JsonbPath new_path;
2064
16
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2065
0
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2066
0
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2067
0
                                current_leg->leg_ptr, current_leg->leg_len,
2068
0
                                current_leg->array_index, current_leg->type);
2069
0
                        new_path.add_leg_to_leg_vector(std::move(leg));
2070
0
                    }
2071
2072
16
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2073
16
                                               parents)) {
2074
0
                        continue;
2075
0
                    }
2076
16
                }
2077
2078
86
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2079
86
                leg_info* last_leg =
2080
86
                        legs_count > 0
2081
86
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2082
86
                                : nullptr;
2083
86
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2084
86
                                                 json_value[value_index], replace, last_leg,
2085
86
                                                 writer));
2086
2087
86
                auto* writer_output = writer.getOutput();
2088
86
                if (writer_output->getSize() > tmp_buffer.capacity) {
2089
26
                    tmp_buffer.capacity =
2090
26
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2091
26
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2092
26
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2093
26
                }
2094
2095
86
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2096
86
                tmp_buffer.size = writer_output->getSize();
2097
2098
86
                writer.reset();
2099
2100
86
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2101
86
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2102
86
            }
2103
2104
57
            if (!null_map[row_idx]) {
2105
40
                const auto* jsonb_document = json_documents[row_idx];
2106
40
                const auto size = jsonb_document->numPackedBytes();
2107
40
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2108
40
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2109
40
            }
2110
2111
57
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2112
2113
57
            if (!null_map[row_idx]) {
2114
40
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2115
40
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2116
40
                const JsonbDocument* doc = nullptr;
2117
40
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2118
40
                        reinterpret_cast<const char*>(ptr), size, &doc));
2119
40
            }
2120
57
        }
2121
2122
45
        block.get_by_position(result).column = std::move(result_column);
2123
45
        return Status::OK();
2124
29
    }
2125
2126
    bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path,
2127
424
                               std::vector<const JsonbValue*>& parents) const {
2128
424
        const size_t index = parents.size() - 1;
2129
424
        if (index == path.get_leg_vector_size()) {
2130
181
            return true;
2131
181
        }
2132
2133
243
        JsonbPath current;
2134
243
        auto* current_leg = path.get_leg_from_leg_vector(index);
2135
243
        std::unique_ptr<leg_info> leg =
2136
243
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2137
243
                                           current_leg->array_index, current_leg->type);
2138
243
        current.add_leg_to_leg_vector(std::move(leg));
2139
2140
243
        auto find_result = root->findValue(current);
2141
243
        if (!find_result.value) {
2142
12
            std::string path_string;
2143
12
            current.to_string(&path_string);
2144
12
            return false;
2145
231
        } else if (find_result.value == root) {
2146
6
            return true;
2147
225
        } else {
2148
225
            parents.emplace_back(find_result.value);
2149
225
        }
2150
2151
225
        return build_parents_by_path(find_result.value, path, parents);
2152
243
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2127
88
                               std::vector<const JsonbValue*>& parents) const {
2128
88
        const size_t index = parents.size() - 1;
2129
88
        if (index == path.get_leg_vector_size()) {
2130
45
            return true;
2131
45
        }
2132
2133
43
        JsonbPath current;
2134
43
        auto* current_leg = path.get_leg_from_leg_vector(index);
2135
43
        std::unique_ptr<leg_info> leg =
2136
43
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2137
43
                                           current_leg->array_index, current_leg->type);
2138
43
        current.add_leg_to_leg_vector(std::move(leg));
2139
2140
43
        auto find_result = root->findValue(current);
2141
43
        if (!find_result.value) {
2142
1
            std::string path_string;
2143
1
            current.to_string(&path_string);
2144
1
            return false;
2145
42
        } else if (find_result.value == root) {
2146
0
            return true;
2147
42
        } else {
2148
42
            parents.emplace_back(find_result.value);
2149
42
        }
2150
2151
42
        return build_parents_by_path(find_result.value, path, parents);
2152
43
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2127
173
                               std::vector<const JsonbValue*>& parents) const {
2128
173
        const size_t index = parents.size() - 1;
2129
173
        if (index == path.get_leg_vector_size()) {
2130
69
            return true;
2131
69
        }
2132
2133
104
        JsonbPath current;
2134
104
        auto* current_leg = path.get_leg_from_leg_vector(index);
2135
104
        std::unique_ptr<leg_info> leg =
2136
104
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2137
104
                                           current_leg->array_index, current_leg->type);
2138
104
        current.add_leg_to_leg_vector(std::move(leg));
2139
2140
104
        auto find_result = root->findValue(current);
2141
104
        if (!find_result.value) {
2142
11
            std::string path_string;
2143
11
            current.to_string(&path_string);
2144
11
            return false;
2145
93
        } else if (find_result.value == root) {
2146
3
            return true;
2147
90
        } else {
2148
90
            parents.emplace_back(find_result.value);
2149
90
        }
2150
2151
90
        return build_parents_by_path(find_result.value, path, parents);
2152
104
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2127
163
                               std::vector<const JsonbValue*>& parents) const {
2128
163
        const size_t index = parents.size() - 1;
2129
163
        if (index == path.get_leg_vector_size()) {
2130
67
            return true;
2131
67
        }
2132
2133
96
        JsonbPath current;
2134
96
        auto* current_leg = path.get_leg_from_leg_vector(index);
2135
96
        std::unique_ptr<leg_info> leg =
2136
96
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2137
96
                                           current_leg->array_index, current_leg->type);
2138
96
        current.add_leg_to_leg_vector(std::move(leg));
2139
2140
96
        auto find_result = root->findValue(current);
2141
96
        if (!find_result.value) {
2142
0
            std::string path_string;
2143
0
            current.to_string(&path_string);
2144
0
            return false;
2145
96
        } else if (find_result.value == root) {
2146
3
            return true;
2147
93
        } else {
2148
93
            parents.emplace_back(find_result.value);
2149
93
        }
2150
2151
93
        return build_parents_by_path(find_result.value, path, parents);
2152
96
    }
2153
2154
    Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents,
2155
                            const size_t parent_index, const JsonbValue* value, const bool replace,
2156
412
                            const leg_info* last_leg, JsonbWriter& writer) const {
2157
412
        if (parent_index >= parents.size()) {
2158
0
            return Status::InvalidArgument(
2159
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2160
0
                    parent_index, parents.size());
2161
0
        }
2162
2163
412
        if (parents[parent_index] != root) {
2164
0
            return Status::InvalidArgument(
2165
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2166
0
                    "parents size: {}",
2167
0
                    parent_index, parents.size());
2168
0
        }
2169
2170
412
        if (parent_index == parents.size() - 1 && replace) {
2171
            // We are at the last parent, write the value directly
2172
128
            if (value == nullptr) {
2173
36
                writer.writeNull();
2174
92
            } else {
2175
92
                writer.writeValue(value);
2176
92
            }
2177
128
            return Status::OK();
2178
128
        }
2179
2180
284
        bool value_written = false;
2181
284
        bool is_last_parent = (parent_index == parents.size() - 1);
2182
284
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2183
284
        if (root->isArray()) {
2184
39
            writer.writeStartArray();
2185
39
            const auto* array_val = root->unpack<ArrayVal>();
2186
117
            for (int i = 0; i != array_val->numElem(); ++i) {
2187
78
                auto* it = array_val->get(i);
2188
2189
78
                if (is_last_parent && last_leg->array_index == i) {
2190
0
                    value_written = true;
2191
0
                    writer.writeValue(value);
2192
78
                } else if (it == next_parent) {
2193
23
                    value_written = true;
2194
23
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2195
23
                                                     last_leg, writer));
2196
55
                } else {
2197
55
                    writer.writeValue(it);
2198
55
                }
2199
78
            }
2200
39
            if (is_last_parent && !value_written) {
2201
16
                value_written = true;
2202
16
                writer.writeValue(value);
2203
16
            }
2204
2205
39
            writer.writeEndArray();
2206
2207
245
        } else {
2208
            /**
2209
                Because even for a non-array object, `$[0]` can still point to that object:
2210
                ```
2211
                select json_extract('{"key": "value"}', '$[0]');
2212
                +------------------------------------------+
2213
                | json_extract('{"key": "value"}', '$[0]') |
2214
                +------------------------------------------+
2215
                | {"key": "value"}                         |
2216
                +------------------------------------------+
2217
                ```
2218
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2219
                it should be converted to an array before insertion:
2220
                ```
2221
                select json_insert('123','$[1]', null);
2222
                +---------------------------------+
2223
                | json_insert('123','$[1]', null) |
2224
                +---------------------------------+
2225
                | [123, null]                     |
2226
                +---------------------------------+
2227
                ```
2228
             */
2229
245
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2230
10
                writer.writeStartArray();
2231
10
                writer.writeValue(root);
2232
10
                writer.writeValue(value);
2233
10
                writer.writeEndArray();
2234
10
                return Status::OK();
2235
235
            } else if (root->isObject()) {
2236
235
                writer.writeStartObject();
2237
235
                const auto* object_val = root->unpack<ObjectVal>();
2238
529
                for (const auto& it : *object_val) {
2239
529
                    writer.writeKey(it.getKeyStr(), it.klen());
2240
529
                    if (it.value() == next_parent) {
2241
202
                        value_written = true;
2242
202
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2243
202
                                                         value, replace, last_leg, writer));
2244
327
                    } else {
2245
327
                        writer.writeValue(it.value());
2246
327
                    }
2247
529
                }
2248
2249
235
                if (is_last_parent && !value_written) {
2250
33
                    value_written = true;
2251
33
                    writer.writeStartObject();
2252
33
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2253
33
                    writer.writeValue(value);
2254
33
                    writer.writeEndObject();
2255
33
                }
2256
235
                writer.writeEndObject();
2257
2258
235
            } else {
2259
0
                return Status::InvalidArgument("Cannot insert value into this type");
2260
0
            }
2261
245
        }
2262
2263
274
        if (!value_written) {
2264
0
            return Status::InvalidArgument(
2265
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2266
0
                    parent_index, parents.size());
2267
0
        }
2268
2269
274
        return Status::OK();
2270
274
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2156
87
                            const leg_info* last_leg, JsonbWriter& writer) const {
2157
87
        if (parent_index >= parents.size()) {
2158
0
            return Status::InvalidArgument(
2159
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2160
0
                    parent_index, parents.size());
2161
0
        }
2162
2163
87
        if (parents[parent_index] != root) {
2164
0
            return Status::InvalidArgument(
2165
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2166
0
                    "parents size: {}",
2167
0
                    parent_index, parents.size());
2168
0
        }
2169
2170
87
        if (parent_index == parents.size() - 1 && replace) {
2171
            // We are at the last parent, write the value directly
2172
0
            if (value == nullptr) {
2173
0
                writer.writeNull();
2174
0
            } else {
2175
0
                writer.writeValue(value);
2176
0
            }
2177
0
            return Status::OK();
2178
0
        }
2179
2180
87
        bool value_written = false;
2181
87
        bool is_last_parent = (parent_index == parents.size() - 1);
2182
87
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2183
87
        if (root->isArray()) {
2184
8
            writer.writeStartArray();
2185
8
            const auto* array_val = root->unpack<ArrayVal>();
2186
24
            for (int i = 0; i != array_val->numElem(); ++i) {
2187
16
                auto* it = array_val->get(i);
2188
2189
16
                if (is_last_parent && last_leg->array_index == i) {
2190
0
                    value_written = true;
2191
0
                    writer.writeValue(value);
2192
16
                } else if (it == next_parent) {
2193
0
                    value_written = true;
2194
0
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2195
0
                                                     last_leg, writer));
2196
16
                } else {
2197
16
                    writer.writeValue(it);
2198
16
                }
2199
16
            }
2200
8
            if (is_last_parent && !value_written) {
2201
8
                value_written = true;
2202
8
                writer.writeValue(value);
2203
8
            }
2204
2205
8
            writer.writeEndArray();
2206
2207
79
        } else {
2208
            /**
2209
                Because even for a non-array object, `$[0]` can still point to that object:
2210
                ```
2211
                select json_extract('{"key": "value"}', '$[0]');
2212
                +------------------------------------------+
2213
                | json_extract('{"key": "value"}', '$[0]') |
2214
                +------------------------------------------+
2215
                | {"key": "value"}                         |
2216
                +------------------------------------------+
2217
                ```
2218
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2219
                it should be converted to an array before insertion:
2220
                ```
2221
                select json_insert('123','$[1]', null);
2222
                +---------------------------------+
2223
                | json_insert('123','$[1]', null) |
2224
                +---------------------------------+
2225
                | [123, null]                     |
2226
                +---------------------------------+
2227
                ```
2228
             */
2229
79
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2230
5
                writer.writeStartArray();
2231
5
                writer.writeValue(root);
2232
5
                writer.writeValue(value);
2233
5
                writer.writeEndArray();
2234
5
                return Status::OK();
2235
74
            } else if (root->isObject()) {
2236
74
                writer.writeStartObject();
2237
74
                const auto* object_val = root->unpack<ObjectVal>();
2238
84
                for (const auto& it : *object_val) {
2239
84
                    writer.writeKey(it.getKeyStr(), it.klen());
2240
84
                    if (it.value() == next_parent) {
2241
42
                        value_written = true;
2242
42
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2243
42
                                                         value, replace, last_leg, writer));
2244
42
                    } else {
2245
42
                        writer.writeValue(it.value());
2246
42
                    }
2247
84
                }
2248
2249
74
                if (is_last_parent && !value_written) {
2250
32
                    value_written = true;
2251
32
                    writer.writeStartObject();
2252
32
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2253
32
                    writer.writeValue(value);
2254
32
                    writer.writeEndObject();
2255
32
                }
2256
74
                writer.writeEndObject();
2257
2258
74
            } else {
2259
0
                return Status::InvalidArgument("Cannot insert value into this type");
2260
0
            }
2261
79
        }
2262
2263
82
        if (!value_written) {
2264
0
            return Status::InvalidArgument(
2265
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2266
0
                    parent_index, parents.size());
2267
0
        }
2268
2269
82
        return Status::OK();
2270
82
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2156
162
                            const leg_info* last_leg, JsonbWriter& writer) const {
2157
162
        if (parent_index >= parents.size()) {
2158
0
            return Status::InvalidArgument(
2159
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2160
0
                    parent_index, parents.size());
2161
0
        }
2162
2163
162
        if (parents[parent_index] != root) {
2164
0
            return Status::InvalidArgument(
2165
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2166
0
                    "parents size: {}",
2167
0
                    parent_index, parents.size());
2168
0
        }
2169
2170
162
        if (parent_index == parents.size() - 1 && replace) {
2171
            // We are at the last parent, write the value directly
2172
58
            if (value == nullptr) {
2173
16
                writer.writeNull();
2174
42
            } else {
2175
42
                writer.writeValue(value);
2176
42
            }
2177
58
            return Status::OK();
2178
58
        }
2179
2180
104
        bool value_written = false;
2181
104
        bool is_last_parent = (parent_index == parents.size() - 1);
2182
104
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2183
104
        if (root->isArray()) {
2184
18
            writer.writeStartArray();
2185
18
            const auto* array_val = root->unpack<ArrayVal>();
2186
54
            for (int i = 0; i != array_val->numElem(); ++i) {
2187
36
                auto* it = array_val->get(i);
2188
2189
36
                if (is_last_parent && last_leg->array_index == i) {
2190
0
                    value_written = true;
2191
0
                    writer.writeValue(value);
2192
36
                } else if (it == next_parent) {
2193
10
                    value_written = true;
2194
10
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2195
10
                                                     last_leg, writer));
2196
26
                } else {
2197
26
                    writer.writeValue(it);
2198
26
                }
2199
36
            }
2200
18
            if (is_last_parent && !value_written) {
2201
8
                value_written = true;
2202
8
                writer.writeValue(value);
2203
8
            }
2204
2205
18
            writer.writeEndArray();
2206
2207
86
        } else {
2208
            /**
2209
                Because even for a non-array object, `$[0]` can still point to that object:
2210
                ```
2211
                select json_extract('{"key": "value"}', '$[0]');
2212
                +------------------------------------------+
2213
                | json_extract('{"key": "value"}', '$[0]') |
2214
                +------------------------------------------+
2215
                | {"key": "value"}                         |
2216
                +------------------------------------------+
2217
                ```
2218
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2219
                it should be converted to an array before insertion:
2220
                ```
2221
                select json_insert('123','$[1]', null);
2222
                +---------------------------------+
2223
                | json_insert('123','$[1]', null) |
2224
                +---------------------------------+
2225
                | [123, null]                     |
2226
                +---------------------------------+
2227
                ```
2228
             */
2229
86
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2230
5
                writer.writeStartArray();
2231
5
                writer.writeValue(root);
2232
5
                writer.writeValue(value);
2233
5
                writer.writeEndArray();
2234
5
                return Status::OK();
2235
81
            } else if (root->isObject()) {
2236
81
                writer.writeStartObject();
2237
81
                const auto* object_val = root->unpack<ObjectVal>();
2238
221
                for (const auto& it : *object_val) {
2239
221
                    writer.writeKey(it.getKeyStr(), it.klen());
2240
221
                    if (it.value() == next_parent) {
2241
80
                        value_written = true;
2242
80
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2243
80
                                                         value, replace, last_leg, writer));
2244
141
                    } else {
2245
141
                        writer.writeValue(it.value());
2246
141
                    }
2247
221
                }
2248
2249
81
                if (is_last_parent && !value_written) {
2250
1
                    value_written = true;
2251
1
                    writer.writeStartObject();
2252
1
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2253
1
                    writer.writeValue(value);
2254
1
                    writer.writeEndObject();
2255
1
                }
2256
81
                writer.writeEndObject();
2257
2258
81
            } else {
2259
0
                return Status::InvalidArgument("Cannot insert value into this type");
2260
0
            }
2261
86
        }
2262
2263
99
        if (!value_written) {
2264
0
            return Status::InvalidArgument(
2265
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2266
0
                    parent_index, parents.size());
2267
0
        }
2268
2269
99
        return Status::OK();
2270
99
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2156
163
                            const leg_info* last_leg, JsonbWriter& writer) const {
2157
163
        if (parent_index >= parents.size()) {
2158
0
            return Status::InvalidArgument(
2159
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2160
0
                    parent_index, parents.size());
2161
0
        }
2162
2163
163
        if (parents[parent_index] != root) {
2164
0
            return Status::InvalidArgument(
2165
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2166
0
                    "parents size: {}",
2167
0
                    parent_index, parents.size());
2168
0
        }
2169
2170
163
        if (parent_index == parents.size() - 1 && replace) {
2171
            // We are at the last parent, write the value directly
2172
70
            if (value == nullptr) {
2173
20
                writer.writeNull();
2174
50
            } else {
2175
50
                writer.writeValue(value);
2176
50
            }
2177
70
            return Status::OK();
2178
70
        }
2179
2180
93
        bool value_written = false;
2181
93
        bool is_last_parent = (parent_index == parents.size() - 1);
2182
93
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2183
93
        if (root->isArray()) {
2184
13
            writer.writeStartArray();
2185
13
            const auto* array_val = root->unpack<ArrayVal>();
2186
39
            for (int i = 0; i != array_val->numElem(); ++i) {
2187
26
                auto* it = array_val->get(i);
2188
2189
26
                if (is_last_parent && last_leg->array_index == i) {
2190
0
                    value_written = true;
2191
0
                    writer.writeValue(value);
2192
26
                } else if (it == next_parent) {
2193
13
                    value_written = true;
2194
13
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2195
13
                                                     last_leg, writer));
2196
13
                } else {
2197
13
                    writer.writeValue(it);
2198
13
                }
2199
26
            }
2200
13
            if (is_last_parent && !value_written) {
2201
0
                value_written = true;
2202
0
                writer.writeValue(value);
2203
0
            }
2204
2205
13
            writer.writeEndArray();
2206
2207
80
        } else {
2208
            /**
2209
                Because even for a non-array object, `$[0]` can still point to that object:
2210
                ```
2211
                select json_extract('{"key": "value"}', '$[0]');
2212
                +------------------------------------------+
2213
                | json_extract('{"key": "value"}', '$[0]') |
2214
                +------------------------------------------+
2215
                | {"key": "value"}                         |
2216
                +------------------------------------------+
2217
                ```
2218
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2219
                it should be converted to an array before insertion:
2220
                ```
2221
                select json_insert('123','$[1]', null);
2222
                +---------------------------------+
2223
                | json_insert('123','$[1]', null) |
2224
                +---------------------------------+
2225
                | [123, null]                     |
2226
                +---------------------------------+
2227
                ```
2228
             */
2229
80
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2230
0
                writer.writeStartArray();
2231
0
                writer.writeValue(root);
2232
0
                writer.writeValue(value);
2233
0
                writer.writeEndArray();
2234
0
                return Status::OK();
2235
80
            } else if (root->isObject()) {
2236
80
                writer.writeStartObject();
2237
80
                const auto* object_val = root->unpack<ObjectVal>();
2238
224
                for (const auto& it : *object_val) {
2239
224
                    writer.writeKey(it.getKeyStr(), it.klen());
2240
224
                    if (it.value() == next_parent) {
2241
80
                        value_written = true;
2242
80
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2243
80
                                                         value, replace, last_leg, writer));
2244
144
                    } else {
2245
144
                        writer.writeValue(it.value());
2246
144
                    }
2247
224
                }
2248
2249
80
                if (is_last_parent && !value_written) {
2250
0
                    value_written = true;
2251
0
                    writer.writeStartObject();
2252
0
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2253
0
                    writer.writeValue(value);
2254
0
                    writer.writeEndObject();
2255
0
                }
2256
80
                writer.writeEndObject();
2257
2258
80
            } else {
2259
0
                return Status::InvalidArgument("Cannot insert value into this type");
2260
0
            }
2261
80
        }
2262
2263
93
        if (!value_written) {
2264
0
            return Status::InvalidArgument(
2265
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2266
0
                    parent_index, parents.size());
2267
0
        }
2268
2269
93
        return Status::OK();
2270
93
    }
2271
2272
    Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths,
2273
                                  DorisVector<DorisVector<const JsonbValue*>>& json_values,
2274
                                  const ColumnNumbers& arguments, const size_t input_rows_count,
2275
                                  const std::vector<const ColumnString*>& json_path_columns,
2276
                                  const std::vector<bool>& json_path_constant,
2277
                                  const std::vector<const NullMap*>& json_path_null_maps,
2278
                                  const std::vector<const ColumnString*>& json_value_columns,
2279
                                  const std::vector<bool>& json_value_constant,
2280
92
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2281
213
        for (size_t i = 1; i < arguments.size(); i += 2) {
2282
129
            const size_t index = i / 2;
2283
129
            const auto* json_path_column = json_path_columns[index];
2284
129
            const auto* value_column = json_value_columns[index];
2285
2286
129
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2287
129
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2288
2289
259
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2290
138
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2291
6
                    continue;
2292
6
                }
2293
2294
132
                auto path_string = json_path_column->get_data_at(row_idx);
2295
132
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2296
3
                    return Status::InvalidArgument(
2297
3
                            "Json path error: Invalid Json Path for value: {}, "
2298
3
                            "argument "
2299
3
                            "index: {}, row index: {}",
2300
3
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2301
3
                }
2302
2303
129
                if (json_paths[index][row_idx].is_wildcard()) {
2304
5
                    return Status::InvalidArgument(
2305
5
                            "In this situation, path expressions may not contain the * and ** "
2306
5
                            "tokens, argument index: {}, row index: {}",
2307
5
                            i, row_idx);
2308
5
                }
2309
129
            }
2310
2311
372
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2312
251
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2313
75
                    continue;
2314
75
                }
2315
2316
176
                auto value_string = value_column->get_data_at(row_idx);
2317
176
                const JsonbDocument* doc = nullptr;
2318
176
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2319
176
                                                                      value_string.size, &doc));
2320
176
                if (doc) {
2321
176
                    json_values[index][row_idx] = doc->getValue();
2322
176
                }
2323
176
            }
2324
121
        }
2325
2326
84
        return Status::OK();
2327
92
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_
Line
Count
Source
2280
31
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2281
69
        for (size_t i = 1; i < arguments.size(); i += 2) {
2282
42
            const size_t index = i / 2;
2283
42
            const auto* json_path_column = json_path_columns[index];
2284
42
            const auto* value_column = json_value_columns[index];
2285
2286
42
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2287
42
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2288
2289
89
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2290
51
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2291
4
                    continue;
2292
4
                }
2293
2294
47
                auto path_string = json_path_column->get_data_at(row_idx);
2295
47
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2296
1
                    return Status::InvalidArgument(
2297
1
                            "Json path error: Invalid Json Path for value: {}, "
2298
1
                            "argument "
2299
1
                            "index: {}, row index: {}",
2300
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2301
1
                }
2302
2303
46
                if (json_paths[index][row_idx].is_wildcard()) {
2304
3
                    return Status::InvalidArgument(
2305
3
                            "In this situation, path expressions may not contain the * and ** "
2306
3
                            "tokens, argument index: {}, row index: {}",
2307
3
                            i, row_idx);
2308
3
                }
2309
46
            }
2310
2311
118
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2312
80
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2313
23
                    continue;
2314
23
                }
2315
2316
57
                auto value_string = value_column->get_data_at(row_idx);
2317
57
                const JsonbDocument* doc = nullptr;
2318
57
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2319
57
                                                                      value_string.size, &doc));
2320
57
                if (doc) {
2321
57
                    json_values[index][row_idx] = doc->getValue();
2322
57
                }
2323
57
            }
2324
38
        }
2325
2326
27
        return Status::OK();
2327
31
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_
Line
Count
Source
2280
30
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2281
70
        for (size_t i = 1; i < arguments.size(); i += 2) {
2282
42
            const size_t index = i / 2;
2283
42
            const auto* json_path_column = json_path_columns[index];
2284
42
            const auto* value_column = json_value_columns[index];
2285
2286
42
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2287
42
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2288
2289
82
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2290
42
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2291
1
                    continue;
2292
1
                }
2293
2294
41
                auto path_string = json_path_column->get_data_at(row_idx);
2295
41
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2296
1
                    return Status::InvalidArgument(
2297
1
                            "Json path error: Invalid Json Path for value: {}, "
2298
1
                            "argument "
2299
1
                            "index: {}, row index: {}",
2300
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2301
1
                }
2302
2303
40
                if (json_paths[index][row_idx].is_wildcard()) {
2304
1
                    return Status::InvalidArgument(
2305
1
                            "In this situation, path expressions may not contain the * and ** "
2306
1
                            "tokens, argument index: {}, row index: {}",
2307
1
                            i, row_idx);
2308
1
                }
2309
40
            }
2310
2311
124
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2312
84
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2313
25
                    continue;
2314
25
                }
2315
2316
59
                auto value_string = value_column->get_data_at(row_idx);
2317
59
                const JsonbDocument* doc = nullptr;
2318
59
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2319
59
                                                                      value_string.size, &doc));
2320
59
                if (doc) {
2321
59
                    json_values[index][row_idx] = doc->getValue();
2322
59
                }
2323
59
            }
2324
40
        }
2325
2326
28
        return Status::OK();
2327
30
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ENS6_ILb0ELb0ELb0ES7_Lb0EEELm16ELm15EEESaIS16_EESX_S11_S1A_
Line
Count
Source
2280
31
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2281
74
        for (size_t i = 1; i < arguments.size(); i += 2) {
2282
45
            const size_t index = i / 2;
2283
45
            const auto* json_path_column = json_path_columns[index];
2284
45
            const auto* value_column = json_value_columns[index];
2285
2286
45
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2287
45
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2288
2289
88
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2290
45
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2291
1
                    continue;
2292
1
                }
2293
2294
44
                auto path_string = json_path_column->get_data_at(row_idx);
2295
44
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2296
1
                    return Status::InvalidArgument(
2297
1
                            "Json path error: Invalid Json Path for value: {}, "
2298
1
                            "argument "
2299
1
                            "index: {}, row index: {}",
2300
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2301
1
                }
2302
2303
43
                if (json_paths[index][row_idx].is_wildcard()) {
2304
1
                    return Status::InvalidArgument(
2305
1
                            "In this situation, path expressions may not contain the * and ** "
2306
1
                            "tokens, argument index: {}, row index: {}",
2307
1
                            i, row_idx);
2308
1
                }
2309
43
            }
2310
2311
130
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2312
87
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2313
27
                    continue;
2314
27
                }
2315
2316
60
                auto value_string = value_column->get_data_at(row_idx);
2317
60
                const JsonbDocument* doc = nullptr;
2318
60
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2319
60
                                                                      value_string.size, &doc));
2320
60
                if (doc) {
2321
60
                    json_values[index][row_idx] = doc->getValue();
2322
60
                }
2323
60
            }
2324
43
        }
2325
2326
29
        return Status::OK();
2327
31
    }
2328
};
2329
2330
struct JsonbContainsAndPathImpl {
2331
57
    static DataTypes get_variadic_argument_types() {
2332
57
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(),
2333
57
                std::make_shared<DataTypeString>()};
2334
57
    }
2335
2336
    static Status execute_impl(FunctionContext* context, Block& block,
2337
                               const ColumnNumbers& arguments, uint32_t result,
2338
126
                               size_t input_rows_count) {
2339
126
        return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result,
2340
126
                                                         input_rows_count);
2341
126
    }
2342
};
2343
2344
class FunctionJsonSearch : public IFunction {
2345
private:
2346
    using OneFun = std::function<Status(size_t, bool*)>;
2347
65
    static Status always_one(size_t i, bool* res) {
2348
65
        *res = true;
2349
65
        return Status::OK();
2350
65
    }
2351
48
    static Status always_all(size_t i, bool* res) {
2352
48
        *res = false;
2353
48
        return Status::OK();
2354
48
    }
2355
2356
    using CheckNullFun = std::function<bool(size_t)>;
2357
294
    static bool always_not_null(size_t) { return false; }
2358
2359
    using GetJsonStringRefFun = std::function<StringRef(size_t)>;
2360
2361
329
    Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const {
2362
329
        StringRef pattern; // not used
2363
329
        StringRef value_val(str.data(), str.size());
2364
329
        return (state->scalar_function)(&state->search_state, value_val, pattern, res);
2365
329
    }
2366
2367
    /**
2368
     * Recursive search for matching string, if found, the result will be added to a vector
2369
     * @param element json element
2370
     * @param one_match
2371
     * @param search_str
2372
     * @param cur_path
2373
     * @param matches The path that has already been matched
2374
     * @return true if matched else false
2375
     */
2376
    bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state,
2377
717
                      JsonbPath* cur_path, std::unordered_set<std::string>* matches) const {
2378
717
        if (element->isString()) {
2379
329
            const auto* json_string = element->unpack<JsonbStringVal>();
2380
329
            const std::string_view element_str(json_string->getBlob(), json_string->length());
2381
329
            unsigned char res;
2382
329
            RETURN_IF_ERROR(matched(element_str, state, &res));
2383
329
            if (res) {
2384
206
                std::string str;
2385
206
                auto valid = cur_path->to_string(&str);
2386
206
                if (!valid) {
2387
0
                    return false;
2388
0
                }
2389
206
                return matches->insert(str).second;
2390
206
            } else {
2391
123
                return false;
2392
123
            }
2393
388
        } else if (element->isObject()) {
2394
195
            const auto* object = element->unpack<ObjectVal>();
2395
195
            bool find = false;
2396
201
            for (const auto& item : *object) {
2397
201
                Slice key(item.getKeyStr(), item.klen());
2398
201
                const auto* child_element = item.value();
2399
                // construct an object member path leg.
2400
201
                auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE);
2401
201
                cur_path->add_leg_to_leg_vector(std::move(leg));
2402
201
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2403
201
                cur_path->pop_leg_from_leg_vector();
2404
201
                if (one_match && find) {
2405
3
                    return true;
2406
3
                }
2407
201
            }
2408
192
            return find;
2409
195
        } else if (element->isArray()) {
2410
193
            const auto* array = element->unpack<ArrayVal>();
2411
193
            bool find = false;
2412
512
            for (int i = 0; i < array->numElem(); ++i) {
2413
385
                auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE);
2414
385
                cur_path->add_leg_to_leg_vector(std::move(leg));
2415
385
                const auto* child_element = array->get(i);
2416
                // construct an array cell path leg.
2417
385
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2418
385
                cur_path->pop_leg_from_leg_vector();
2419
385
                if (one_match && find) {
2420
66
                    return true;
2421
66
                }
2422
385
            }
2423
127
            return find;
2424
193
        } else {
2425
0
            return false;
2426
0
        }
2427
717
    }
2428
2429
    void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches,
2430
117
                         ColumnString* result_col) const {
2431
117
        if (matches.size() == 1) {
2432
86
            for (const auto& str_ref : matches) {
2433
86
                writer.writeStartString();
2434
86
                writer.writeString(str_ref);
2435
86
                writer.writeEndString();
2436
86
            }
2437
86
        } else {
2438
31
            writer.writeStartArray();
2439
120
            for (const auto& str_ref : matches) {
2440
120
                writer.writeStartString();
2441
120
                writer.writeString(str_ref);
2442
120
                writer.writeEndString();
2443
120
            }
2444
31
            writer.writeEndArray();
2445
31
        }
2446
2447
117
        result_col->insert_data(writer.getOutput()->getBuffer(),
2448
117
                                (size_t)writer.getOutput()->getSize());
2449
117
    }
2450
2451
    template <bool search_is_const>
2452
    Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check,
2453
                          GetJsonStringRefFun col_json_string, CheckNullFun one_null_check,
2454
                          OneFun one_check, CheckNullFun search_null_check,
2455
                          const ColumnString* col_search_string, FunctionContext* context,
2456
47
                          size_t result) const {
2457
47
        auto result_col = ColumnString::create();
2458
47
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2459
2460
47
        std::shared_ptr<LikeState> state_ptr;
2461
47
        LikeState* state = nullptr;
2462
47
        if (search_is_const) {
2463
8
            state = reinterpret_cast<LikeState*>(
2464
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2465
8
        }
2466
2467
47
        bool is_one = false;
2468
2469
47
        JsonbWriter writer;
2470
204
        for (size_t i = 0; i < input_rows_count; ++i) {
2471
            // an error occurs if the json_doc argument is not a valid json document.
2472
159
            if (json_null_check(i)) {
2473
12
                null_map->get_data()[i] = 1;
2474
12
                result_col->insert_data("", 0);
2475
12
                continue;
2476
12
            }
2477
147
            const auto& json_doc_str = col_json_string(i);
2478
147
            const JsonbDocument* json_doc = nullptr;
2479
147
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2480
147
                                                            &json_doc);
2481
147
            if (!st.ok()) {
2482
0
                return Status::InvalidArgument(
2483
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2484
0
                        st.to_string());
2485
0
            }
2486
2487
147
            if (!one_null_check(i)) {
2488
145
                RETURN_IF_ERROR(one_check(i, &is_one));
2489
145
            }
2490
2491
145
            if (one_null_check(i) || search_null_check(i)) {
2492
14
                null_map->get_data()[i] = 1;
2493
14
                result_col->insert_data("", 0);
2494
14
                continue;
2495
14
            }
2496
2497
            // an error occurs if any path argument is not a valid path expression.
2498
131
            std::string root_path_str = "$";
2499
131
            JsonbPath root_path;
2500
131
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2501
131
            std::vector<JsonbPath*> paths;
2502
131
            paths.push_back(&root_path);
2503
2504
131
            if (!search_is_const) {
2505
99
                state_ptr = std::make_shared<LikeState>();
2506
99
                state_ptr->is_like_pattern = true;
2507
99
                const auto& search_str = col_search_string->get_data_at(i);
2508
99
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2509
99
                                                                         state_ptr, false));
2510
99
                state = state_ptr.get();
2511
99
            }
2512
2513
            // maintain a hashset to deduplicate matches.
2514
131
            std::unordered_set<std::string> matches;
2515
131
            for (const auto& item : paths) {
2516
131
                auto* cur_path = item;
2517
131
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2518
131
                if (is_one && find) {
2519
66
                    break;
2520
66
                }
2521
131
            }
2522
131
            if (matches.empty()) {
2523
                // returns NULL if the search_str is not found in the document.
2524
14
                null_map->get_data()[i] = 1;
2525
14
                result_col->insert_data("", 0);
2526
14
                continue;
2527
14
            }
2528
2529
117
            writer.reset();
2530
117
            make_result_str(writer, matches, result_col.get());
2531
117
        }
2532
45
        auto result_col_nullable =
2533
45
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2534
45
        block.replace_by_position(result, std::move(result_col_nullable));
2535
45
        return Status::OK();
2536
47
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2456
8
                          size_t result) const {
2457
8
        auto result_col = ColumnString::create();
2458
8
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2459
2460
8
        std::shared_ptr<LikeState> state_ptr;
2461
8
        LikeState* state = nullptr;
2462
8
        if (search_is_const) {
2463
8
            state = reinterpret_cast<LikeState*>(
2464
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2465
8
        }
2466
2467
8
        bool is_one = false;
2468
2469
8
        JsonbWriter writer;
2470
44
        for (size_t i = 0; i < input_rows_count; ++i) {
2471
            // an error occurs if the json_doc argument is not a valid json document.
2472
36
            if (json_null_check(i)) {
2473
4
                null_map->get_data()[i] = 1;
2474
4
                result_col->insert_data("", 0);
2475
4
                continue;
2476
4
            }
2477
32
            const auto& json_doc_str = col_json_string(i);
2478
32
            const JsonbDocument* json_doc = nullptr;
2479
32
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2480
32
                                                            &json_doc);
2481
32
            if (!st.ok()) {
2482
0
                return Status::InvalidArgument(
2483
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2484
0
                        st.to_string());
2485
0
            }
2486
2487
32
            if (!one_null_check(i)) {
2488
32
                RETURN_IF_ERROR(one_check(i, &is_one));
2489
32
            }
2490
2491
32
            if (one_null_check(i) || search_null_check(i)) {
2492
0
                null_map->get_data()[i] = 1;
2493
0
                result_col->insert_data("", 0);
2494
0
                continue;
2495
0
            }
2496
2497
            // an error occurs if any path argument is not a valid path expression.
2498
32
            std::string root_path_str = "$";
2499
32
            JsonbPath root_path;
2500
32
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2501
32
            std::vector<JsonbPath*> paths;
2502
32
            paths.push_back(&root_path);
2503
2504
32
            if (!search_is_const) {
2505
0
                state_ptr = std::make_shared<LikeState>();
2506
0
                state_ptr->is_like_pattern = true;
2507
0
                const auto& search_str = col_search_string->get_data_at(i);
2508
0
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2509
0
                                                                         state_ptr, false));
2510
0
                state = state_ptr.get();
2511
0
            }
2512
2513
            // maintain a hashset to deduplicate matches.
2514
32
            std::unordered_set<std::string> matches;
2515
32
            for (const auto& item : paths) {
2516
32
                auto* cur_path = item;
2517
32
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2518
32
                if (is_one && find) {
2519
16
                    break;
2520
16
                }
2521
32
            }
2522
32
            if (matches.empty()) {
2523
                // returns NULL if the search_str is not found in the document.
2524
0
                null_map->get_data()[i] = 1;
2525
0
                result_col->insert_data("", 0);
2526
0
                continue;
2527
0
            }
2528
2529
32
            writer.reset();
2530
32
            make_result_str(writer, matches, result_col.get());
2531
32
        }
2532
8
        auto result_col_nullable =
2533
8
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2534
8
        block.replace_by_position(result, std::move(result_col_nullable));
2535
8
        return Status::OK();
2536
8
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2456
39
                          size_t result) const {
2457
39
        auto result_col = ColumnString::create();
2458
39
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2459
2460
39
        std::shared_ptr<LikeState> state_ptr;
2461
39
        LikeState* state = nullptr;
2462
39
        if (search_is_const) {
2463
0
            state = reinterpret_cast<LikeState*>(
2464
0
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2465
0
        }
2466
2467
39
        bool is_one = false;
2468
2469
39
        JsonbWriter writer;
2470
160
        for (size_t i = 0; i < input_rows_count; ++i) {
2471
            // an error occurs if the json_doc argument is not a valid json document.
2472
123
            if (json_null_check(i)) {
2473
8
                null_map->get_data()[i] = 1;
2474
8
                result_col->insert_data("", 0);
2475
8
                continue;
2476
8
            }
2477
115
            const auto& json_doc_str = col_json_string(i);
2478
115
            const JsonbDocument* json_doc = nullptr;
2479
115
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2480
115
                                                            &json_doc);
2481
115
            if (!st.ok()) {
2482
0
                return Status::InvalidArgument(
2483
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2484
0
                        st.to_string());
2485
0
            }
2486
2487
115
            if (!one_null_check(i)) {
2488
113
                RETURN_IF_ERROR(one_check(i, &is_one));
2489
113
            }
2490
2491
113
            if (one_null_check(i) || search_null_check(i)) {
2492
14
                null_map->get_data()[i] = 1;
2493
14
                result_col->insert_data("", 0);
2494
14
                continue;
2495
14
            }
2496
2497
            // an error occurs if any path argument is not a valid path expression.
2498
99
            std::string root_path_str = "$";
2499
99
            JsonbPath root_path;
2500
99
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2501
99
            std::vector<JsonbPath*> paths;
2502
99
            paths.push_back(&root_path);
2503
2504
99
            if (!search_is_const) {
2505
99
                state_ptr = std::make_shared<LikeState>();
2506
99
                state_ptr->is_like_pattern = true;
2507
99
                const auto& search_str = col_search_string->get_data_at(i);
2508
99
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2509
99
                                                                         state_ptr, false));
2510
99
                state = state_ptr.get();
2511
99
            }
2512
2513
            // maintain a hashset to deduplicate matches.
2514
99
            std::unordered_set<std::string> matches;
2515
99
            for (const auto& item : paths) {
2516
99
                auto* cur_path = item;
2517
99
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2518
99
                if (is_one && find) {
2519
50
                    break;
2520
50
                }
2521
99
            }
2522
99
            if (matches.empty()) {
2523
                // returns NULL if the search_str is not found in the document.
2524
14
                null_map->get_data()[i] = 1;
2525
14
                result_col->insert_data("", 0);
2526
14
                continue;
2527
14
            }
2528
2529
85
            writer.reset();
2530
85
            make_result_str(writer, matches, result_col.get());
2531
85
        }
2532
37
        auto result_col_nullable =
2533
37
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2534
37
        block.replace_by_position(result, std::move(result_col_nullable));
2535
37
        return Status::OK();
2536
39
    }
2537
2538
    static constexpr auto one = "one";
2539
    static constexpr auto all = "all";
2540
2541
public:
2542
    static constexpr auto name = "json_search";
2543
53
    static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); }
2544
2545
1
    String get_name() const override { return name; }
2546
45
    bool is_variadic() const override { return false; }
2547
44
    size_t get_number_of_arguments() const override { return 3; }
2548
2549
44
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2550
44
        return make_nullable(std::make_shared<DataTypeJsonb>());
2551
44
    }
2552
2553
102
    bool use_default_implementation_for_nulls() const override { return false; }
2554
2555
191
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
2556
191
        if (scope != FunctionContext::THREAD_LOCAL) {
2557
44
            return Status::OK();
2558
44
        }
2559
147
        if (context->is_col_constant(2)) {
2560
67
            std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
2561
67
            state->is_like_pattern = true;
2562
67
            const auto pattern_col = context->get_constant_col(2)->column_ptr;
2563
67
            const auto& pattern = pattern_col->get_data_at(0);
2564
67
            RETURN_IF_ERROR(
2565
67
                    FunctionLike::construct_like_const_state(context, pattern, state, false));
2566
67
            context->set_function_state(scope, state);
2567
67
        }
2568
147
        return Status::OK();
2569
147
    }
2570
2571
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2572
58
                        uint32_t result, size_t input_rows_count) const override {
2573
        // the json_doc, one_or_all, and search_str must be given.
2574
        // and we require the positions are static.
2575
58
        if (arguments.size() < 3) {
2576
0
            return Status::InvalidArgument("too few arguments for function {}", name);
2577
0
        }
2578
58
        if (arguments.size() > 3) {
2579
0
            return Status::NotSupported("escape and path params are not support now");
2580
0
        }
2581
2582
58
        CheckNullFun json_null_check = always_not_null;
2583
58
        GetJsonStringRefFun get_json_fun;
2584
        // prepare jsonb data column
2585
58
        auto&& [col_json, json_is_const] =
2586
58
                unpack_if_const(block.get_by_position(arguments[0]).column);
2587
58
        const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get());
2588
58
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) {
2589
58
            col_json_string =
2590
58
                    check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get());
2591
58
        }
2592
2593
58
        if (!col_json_string) {
2594
0
            return Status::RuntimeError("Illegal arg json {} should be ColumnString",
2595
0
                                        col_json->get_name());
2596
0
        }
2597
2598
58
        auto create_all_null_result = [&]() {
2599
6
            auto res_str = ColumnString::create();
2600
6
            res_str->insert_default();
2601
6
            auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1));
2602
6
            if (input_rows_count > 1) {
2603
6
                block.get_by_position(result).column =
2604
6
                        ColumnConst::create(std::move(res), input_rows_count);
2605
6
            } else {
2606
0
                block.get_by_position(result).column = std::move(res);
2607
0
            }
2608
6
            return Status::OK();
2609
6
        };
2610
2611
58
        if (json_is_const) {
2612
11
            if (col_json->is_null_at(0)) {
2613
2
                return create_all_null_result();
2614
9
            } else {
2615
9
                const auto& json_str = col_json_string->get_data_at(0);
2616
36
                get_json_fun = [json_str](size_t i) { return json_str; };
2617
9
            }
2618
47
        } else {
2619
123
            json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); };
2620
111
            get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); };
2621
47
        }
2622
2623
        // one_or_all
2624
56
        CheckNullFun one_null_check = always_not_null;
2625
56
        OneFun one_check = always_one;
2626
56
        auto&& [col_one, one_is_const] =
2627
56
                unpack_if_const(block.get_by_position(arguments[1]).column);
2628
56
        one_is_const |= input_rows_count == 1;
2629
56
        const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get());
2630
56
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) {
2631
9
            col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2632
9
        }
2633
56
        if (!col_one_string) {
2634
0
            return Status::RuntimeError("Illegal arg one {} should be ColumnString",
2635
0
                                        col_one->get_name());
2636
0
        }
2637
56
        if (one_is_const) {
2638
46
            if (col_one->is_null_at(0)) {
2639
4
                return create_all_null_result();
2640
42
            } else {
2641
42
                const auto& one_or_all = col_one_string->get_data_at(0);
2642
42
                std::string one_or_all_str = one_or_all.to_string();
2643
42
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2644
17
                    one_check = always_all;
2645
25
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2646
                    // nothing
2647
20
                } else {
2648
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2649
5
                    return Status::InvalidArgument(
2650
5
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2651
5
                }
2652
42
            }
2653
46
        } else {
2654
66
            one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); };
2655
32
            one_check = [col_one_string](size_t i, bool* is_one) {
2656
32
                const auto& one_or_all = col_one_string->get_data_at(i);
2657
32
                std::string one_or_all_str = one_or_all.to_string();
2658
32
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2659
18
                    *is_one = false;
2660
18
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2661
12
                    *is_one = true;
2662
12
                } else {
2663
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2664
2
                    return Status::InvalidArgument(
2665
2
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2666
2
                }
2667
30
                return Status::OK();
2668
32
            };
2669
10
        }
2670
2671
        // search_str
2672
47
        auto&& [col_search, search_is_const] =
2673
47
                unpack_if_const(block.get_by_position(arguments[2]).column);
2674
2675
47
        const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get());
2676
47
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) {
2677
26
            col_search_string =
2678
26
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2679
26
        }
2680
47
        if (!col_search_string) {
2681
0
            return Status::RuntimeError("Illegal arg pattern {} should be ColumnString",
2682
0
                                        col_search->get_name());
2683
0
        }
2684
47
        if (search_is_const) {
2685
8
            CheckNullFun search_null_check = always_not_null;
2686
8
            if (col_search->is_null_at(0)) {
2687
0
                return create_all_null_result();
2688
0
            }
2689
8
            RETURN_IF_ERROR(execute_vector<true>(
2690
8
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2691
8
                    one_check, search_null_check, col_search_string, context, result));
2692
39
        } else {
2693
111
            CheckNullFun search_null_check = [col_search](size_t i) {
2694
111
                return col_search->is_null_at(i);
2695
111
            };
2696
39
            RETURN_IF_ERROR(execute_vector<false>(
2697
39
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2698
39
                    one_check, search_null_check, col_search_string, context, result));
2699
39
        }
2700
45
        return Status::OK();
2701
47
    }
2702
};
2703
2704
struct DocumentBuffer {
2705
    std::unique_ptr<char[]> ptr;
2706
    size_t size = 0;
2707
    size_t capacity = 0;
2708
};
2709
2710
class FunctionJsonbRemove : public IFunction {
2711
public:
2712
    static constexpr auto name = "jsonb_remove";
2713
    static constexpr auto alias = "json_remove";
2714
2715
31
    static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); }
2716
2717
0
    String get_name() const override { return name; }
2718
2719
0
    size_t get_number_of_arguments() const override { return 0; }
2720
23
    bool is_variadic() const override { return true; }
2721
2722
44
    bool use_default_implementation_for_nulls() const override { return false; }
2723
2724
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2725
22
        return make_nullable(std::make_shared<DataTypeJsonb>());
2726
22
    }
2727
2728
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2729
22
                        uint32_t result, size_t input_rows_count) const override {
2730
22
        DCHECK_GE(arguments.size(), 2);
2731
2732
        // Check if arguments count is valid (json_doc + at least one path)
2733
22
        if (arguments.size() < 2) {
2734
0
            return Status::InvalidArgument("json_remove requires at least 2 arguments");
2735
0
        }
2736
2737
22
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
2738
22
        auto result_column = return_data_type->create_column();
2739
22
        auto& nullable_column = assert_cast<ColumnNullable&>(*result_column);
2740
22
        auto& res_chars =
2741
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars();
2742
22
        auto& res_offsets =
2743
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets();
2744
22
        auto& null_map = nullable_column.get_null_map_data();
2745
2746
22
        res_chars.reserve(input_rows_count * 64);
2747
22
        res_offsets.resize(input_rows_count);
2748
22
        null_map.resize_fill(input_rows_count, 0);
2749
2750
        // Get JSON document column
2751
22
        auto [json_column, json_const] =
2752
22
                unpack_if_const(block.get_by_position(arguments[0]).column);
2753
22
        const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get());
2754
22
        const ColumnString* json_data_column = nullptr;
2755
22
        const NullMap* json_null_map = nullptr;
2756
2757
22
        if (json_nullable) {
2758
22
            json_null_map = &json_nullable->get_null_map_data();
2759
22
            json_data_column =
2760
22
                    check_and_get_column<ColumnString>(&json_nullable->get_nested_column());
2761
22
        } else {
2762
0
            json_data_column = check_and_get_column<ColumnString>(json_column.get());
2763
0
        }
2764
2765
22
        if (!json_data_column) {
2766
0
            return Status::InvalidArgument("First argument must be a JSON document");
2767
0
        }
2768
2769
        // Parse paths
2770
22
        std::vector<const ColumnString*> path_columns;
2771
22
        std::vector<const NullMap*> path_null_maps;
2772
22
        std::vector<bool> path_constants;
2773
2774
51
        for (size_t i = 1; i < arguments.size(); ++i) {
2775
29
            auto [path_column, path_const] =
2776
29
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2777
29
            const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get());
2778
2779
29
            if (path_nullable) {
2780
6
                path_null_maps.push_back(&path_nullable->get_null_map_data());
2781
6
                path_columns.push_back(
2782
6
                        check_and_get_column<ColumnString>(&path_nullable->get_nested_column()));
2783
23
            } else {
2784
23
                path_null_maps.push_back(nullptr);
2785
23
                path_columns.push_back(check_and_get_column<ColumnString>(path_column.get()));
2786
23
            }
2787
2788
29
            if (!path_columns.back()) {
2789
0
                return Status::InvalidArgument(
2790
0
                        fmt::format("Argument {} must be a string path", i + 1));
2791
0
            }
2792
2793
29
            path_constants.push_back(path_const);
2794
29
        }
2795
2796
        // Reusable JsonbWriter for performance
2797
22
        JsonbWriter writer;
2798
2799
48
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) {
2800
28
            size_t json_idx = index_check_const(row_idx, json_const);
2801
2802
            // Check if JSON document is null
2803
28
            if (json_null_map && (*json_null_map)[json_idx]) {
2804
2
                null_map[row_idx] = 1;
2805
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2806
2
                continue;
2807
2
            }
2808
2809
            // Parse JSON document
2810
26
            const auto& json_data = json_data_column->get_data_at(json_idx);
2811
26
            const JsonbDocument* json_doc = nullptr;
2812
26
            Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data,
2813
26
                                                                        json_data.size, &json_doc);
2814
2815
26
            if (!parse_status.ok() || !json_doc) {
2816
0
                null_map[row_idx] = 1;
2817
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2818
0
                continue;
2819
0
            }
2820
2821
            // Check if any path is null
2822
26
            bool has_null_path = false;
2823
59
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2824
35
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2825
35
                if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) {
2826
2
                    has_null_path = true;
2827
2
                    break;
2828
2
                }
2829
35
            }
2830
2831
26
            if (has_null_path) {
2832
2
                null_map[row_idx] = 1;
2833
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2834
2
                continue;
2835
2
            }
2836
2837
24
            std::vector<JsonbPath> paths;
2838
24
            std::vector<bool> path_constants_vec;
2839
2840
54
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2841
32
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2842
32
                const auto& path_data = path_columns[path_idx]->get_data_at(idx);
2843
2844
32
                JsonbPath path;
2845
32
                if (!path.seek(path_data.data, path_data.size)) {
2846
1
                    return Status::InvalidArgument(
2847
1
                            "Json path error: Invalid Json Path for value: {} at row: {}",
2848
1
                            std::string_view(path_data.data, path_data.size), row_idx);
2849
1
                }
2850
2851
31
                if (path.is_wildcard() || path.is_supper_wildcard()) {
2852
1
                    return Status::InvalidArgument(
2853
1
                            "In this situation, path expressions may not contain the * and ** "
2854
1
                            "tokens or an array range, argument index: {}, row index: {}",
2855
1
                            path_idx + 1, row_idx);
2856
1
                }
2857
2858
30
                paths.push_back(std::move(path));
2859
30
                path_constants_vec.push_back(path_constants[path_idx]);
2860
30
            }
2861
2862
22
            const JsonbValue* current_value = json_doc->getValue();
2863
2864
22
            DocumentBuffer tmp_buffer;
2865
2866
52
            for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) {
2867
30
                writer.reset();
2868
2869
30
                auto find_result = current_value->findValue(paths[path_idx]);
2870
2871
30
                if (find_result.is_wildcard) {
2872
0
                    continue;
2873
0
                }
2874
2875
30
                if (find_result.value) {
2876
24
                    RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer));
2877
2878
24
                    auto* writer_output = writer.getOutput();
2879
24
                    if (writer_output->getSize() > tmp_buffer.capacity) {
2880
17
                        tmp_buffer.capacity =
2881
17
                                ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2882
17
                        tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity);
2883
17
                        DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2884
17
                    }
2885
2886
24
                    memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(),
2887
24
                           writer_output->getSize());
2888
24
                    tmp_buffer.size = writer_output->getSize();
2889
2890
24
                    const JsonbDocument* new_doc = nullptr;
2891
24
                    RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2892
24
                            tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc));
2893
2894
24
                    current_value = new_doc->getValue();
2895
24
                }
2896
30
            }
2897
2898
22
            const JsonbDocument* modified_doc = nullptr;
2899
22
            if (current_value != json_doc->getValue()) {
2900
17
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2901
17
                        tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc));
2902
17
            } else {
2903
5
                modified_doc = json_doc;
2904
5
            }
2905
2906
            // Write the final result
2907
22
            const auto size = modified_doc->numPackedBytes();
2908
22
            res_chars.insert(reinterpret_cast<const char*>(modified_doc),
2909
22
                             reinterpret_cast<const char*>(modified_doc) + size);
2910
22
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2911
22
        }
2912
2913
20
        block.get_by_position(result).column = std::move(result_column);
2914
20
        return Status::OK();
2915
22
    }
2916
2917
private:
2918
    Status clone_without_path(const JsonbValue* root, const JsonbPath& path,
2919
24
                              JsonbWriter& writer) const {
2920
        // Start writing at the root level
2921
24
        if (root->isObject()) {
2922
15
            writer.writeStartObject();
2923
15
            RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer));
2924
15
            writer.writeEndObject();
2925
15
        } else if (root->isArray()) {
2926
9
            writer.writeStartArray();
2927
9
            RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer));
2928
9
            writer.writeEndArray();
2929
9
        } else {
2930
            // Primitive value - can't remove anything from it
2931
0
            writer.writeValue(root);
2932
0
        }
2933
24
        return Status::OK();
2934
24
    }
2935
2936
    Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path,
2937
20
                                     size_t depth, JsonbWriter& writer) const {
2938
20
        const auto* obj = obj_value->unpack<ObjectVal>();
2939
2940
40
        for (const auto& kv : *obj) {
2941
40
            std::string key(kv.getKeyStr(), kv.klen());
2942
2943
40
            if (depth < path.get_leg_vector_size()) {
2944
40
                const auto* leg = path.get_leg_from_leg_vector(depth);
2945
40
                if (leg->type == MEMBER_CODE) {
2946
40
                    std::string target_key(leg->leg_ptr, leg->leg_len);
2947
2948
40
                    if (key == target_key) {
2949
20
                        if (depth == path.get_leg_vector_size() - 1) {
2950
12
                            continue;
2951
12
                        } else {
2952
8
                            writer.writeKey(kv.getKeyStr(), kv.klen());
2953
8
                            if (kv.value()->isObject()) {
2954
3
                                writer.writeStartObject();
2955
3
                                RETURN_IF_ERROR(clone_object_without_path(kv.value(), path,
2956
3
                                                                          depth + 1, writer));
2957
3
                                writer.writeEndObject();
2958
5
                            } else if (kv.value()->isArray()) {
2959
5
                                writer.writeStartArray();
2960
5
                                RETURN_IF_ERROR(clone_array_without_path(kv.value(), path,
2961
5
                                                                         depth + 1, writer));
2962
5
                                writer.writeEndArray();
2963
5
                            } else {
2964
0
                                writer.writeValue(kv.value());
2965
0
                            }
2966
8
                        }
2967
20
                    } else {
2968
20
                        writer.writeKey(kv.getKeyStr(), kv.klen());
2969
20
                        writer.writeValue(kv.value());
2970
20
                    }
2971
40
                } else {
2972
0
                    writer.writeKey(kv.getKeyStr(), kv.klen());
2973
0
                    writer.writeValue(kv.value());
2974
0
                }
2975
40
            } else {
2976
0
                writer.writeKey(kv.getKeyStr(), kv.klen());
2977
0
                writer.writeValue(kv.value());
2978
0
            }
2979
40
        }
2980
2981
20
        return Status::OK();
2982
20
    }
2983
2984
    Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path,
2985
17
                                    size_t depth, JsonbWriter& writer) const {
2986
17
        const auto* arr = arr_value->unpack<ArrayVal>();
2987
2988
17
        int index = 0;
2989
52
        for (const auto& element : *arr) {
2990
52
            if (depth < path.get_leg_vector_size()) {
2991
52
                const auto* leg = path.get_leg_from_leg_vector(depth);
2992
52
                if (leg->type == ARRAY_CODE) {
2993
52
                    int target_index = leg->array_index;
2994
2995
52
                    if (index == target_index) {
2996
17
                        if (depth == path.get_leg_vector_size() - 1) {
2997
                            // This is the target element to remove - skip it
2998
12
                        } else {
2999
5
                            if (element.isObject()) {
3000
2
                                writer.writeStartObject();
3001
2
                                RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1,
3002
2
                                                                          writer));
3003
2
                                writer.writeEndObject();
3004
3
                            } else if (element.isArray()) {
3005
3
                                writer.writeStartArray();
3006
3
                                RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1,
3007
3
                                                                         writer));
3008
3
                                writer.writeEndArray();
3009
3
                            } else {
3010
0
                                writer.writeValue(&element);
3011
0
                            }
3012
5
                        }
3013
35
                    } else {
3014
35
                        writer.writeValue(&element);
3015
35
                    }
3016
52
                } else {
3017
0
                    writer.writeValue(&element);
3018
0
                }
3019
52
            } else {
3020
0
                writer.writeValue(&element);
3021
0
            }
3022
52
            index++;
3023
52
        }
3024
3025
17
        return Status::OK();
3026
17
    }
3027
};
3028
3029
class FunctionStripNullValue : public IFunction {
3030
public:
3031
    static constexpr auto name = "strip_null_value";
3032
24
    static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); }
3033
3034
1
    String get_name() const override { return name; }
3035
16
    bool is_variadic() const override { return false; }
3036
15
    size_t get_number_of_arguments() const override { return 1; }
3037
3038
30
    bool use_default_implementation_for_nulls() const override { return false; }
3039
3040
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3041
15
        return make_nullable(std::make_shared<DataTypeJsonb>());
3042
15
    }
3043
3044
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3045
15
                        uint32_t result, size_t input_rows_count) const override {
3046
15
        const auto& arg_column = block.get_by_position(arguments[0]).column;
3047
15
        const ColumnString* json_column = nullptr;
3048
15
        const NullMap* json_null_map = nullptr;
3049
15
        if (arg_column->is_nullable()) {
3050
15
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column);
3051
15
            json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column());
3052
15
            json_null_map = &nullable_col.get_null_map_data();
3053
15
        } else {
3054
0
            json_column = assert_cast<const ColumnString*>(arg_column.get());
3055
0
        }
3056
3057
15
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
3058
15
        auto result_column = return_data_type->create_column();
3059
3060
15
        auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data();
3061
15
        auto& result_data_col = assert_cast<ColumnString&>(
3062
15
                assert_cast<ColumnNullable&>(*result_column).get_nested_column());
3063
3064
15
        result_nullmap.resize_fill(input_rows_count, 0);
3065
60
        for (size_t i = 0; i != input_rows_count; ++i) {
3066
45
            if (json_null_map && (*json_null_map)[i]) {
3067
13
                result_nullmap[i] = 1;
3068
13
                result_data_col.insert_default();
3069
13
                continue;
3070
13
            }
3071
32
            const JsonbDocument* json_doc = nullptr;
3072
32
            const auto& json_str = json_column->get_data_at(i);
3073
32
            RETURN_IF_ERROR(
3074
32
                    JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc));
3075
32
            if (json_doc) [[likely]] {
3076
32
                if (json_doc->getValue()->isNull()) {
3077
9
                    result_nullmap[i] = 1;
3078
9
                    result_data_col.insert_default();
3079
23
                } else {
3080
23
                    result_nullmap[i] = 0;
3081
23
                    result_data_col.insert_data(json_str.data, json_str.size);
3082
23
                }
3083
32
            } else {
3084
0
                result_nullmap[i] = 1;
3085
0
                result_data_col.insert_default();
3086
0
            }
3087
32
        }
3088
3089
15
        block.get_by_position(result).column = std::move(result_column);
3090
15
        return Status::OK();
3091
15
    }
3092
};
3093
3094
8
void register_function_jsonb(SimpleFunctionFactory& factory) {
3095
8
    factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
3096
8
    factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
3097
8
    factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null");
3098
8
    factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null");
3099
8
    factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value");
3100
8
    factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value");
3101
3102
8
    factory.register_function<FunctionJsonbExists>();
3103
8
    factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias);
3104
8
    factory.register_function<FunctionJsonbType>();
3105
8
    factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias);
3106
3107
8
    factory.register_function<FunctionJsonbKeys>();
3108
8
    factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias);
3109
3110
8
    factory.register_function<FunctionJsonbExtractIsnull>();
3111
8
    factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias);
3112
3113
8
    factory.register_function<FunctionJsonbExtractJsonb>();
3114
8
    factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias);
3115
8
    factory.register_function<FunctionJsonbExtractJsonbNoQuotes>();
3116
8
    factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name,
3117
8
                           FunctionJsonbExtractJsonbNoQuotes::alias);
3118
3119
8
    factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
3120
8
    factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
3121
3122
8
    factory.register_function<FunctionJsonSearch>();
3123
3124
8
    factory.register_function<FunctionJsonbArray<false>>();
3125
8
    factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias);
3126
3127
8
    factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null");
3128
8
    factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null");
3129
3130
8
    factory.register_function<FunctionJsonbObject>();
3131
8
    factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias);
3132
3133
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>();
3134
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name,
3135
8
                           FunctionJsonbModify<JsonbModifyType::Insert>::alias);
3136
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>();
3137
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name,
3138
8
                           FunctionJsonbModify<JsonbModifyType::Set>::alias);
3139
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>();
3140
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name,
3141
8
                           FunctionJsonbModify<JsonbModifyType::Replace>::alias);
3142
3143
8
    factory.register_function<FunctionJsonbRemove>();
3144
8
    factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias);
3145
3146
8
    factory.register_function<FunctionStripNullValue>();
3147
8
}
3148
3149
} // namespace doris