Coverage Report

Created: 2026-05-14 10:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_jsonb.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
20
#include <algorithm>
21
#include <cstdlib>
22
#include <memory>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <type_traits>
27
#include <utility>
28
#include <variant>
29
30
#include "common/compiler_util.h" // IWYU pragma: keep
31
#include "common/status.h"
32
#include "core/assert_cast.h"
33
#include "core/block/block.h"
34
#include "core/block/column_numbers.h"
35
#include "core/block/column_with_type_and_name.h"
36
#include "core/column/column.h"
37
#include "core/column/column_array.h"
38
#include "core/column/column_const.h"
39
#include "core/column/column_nullable.h"
40
#include "core/column/column_string.h"
41
#include "core/column/column_vector.h"
42
#include "core/custom_allocator.h"
43
#include "core/data_type/data_type.h"
44
#include "core/data_type/data_type_array.h"
45
#include "core/data_type/data_type_jsonb.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/data_type/data_type_string.h"
48
#include "core/data_type/define_primitive_type.h"
49
#include "core/data_type/primitive_type.h"
50
#include "core/string_ref.h"
51
#include "core/types.h"
52
#include "core/value/jsonb_value.h"
53
#include "exec/common/stringop_substring.h"
54
#include "exec/common/template_helpers.hpp"
55
#include "exec/common/util.hpp"
56
#include "exprs/aggregate/aggregate_function.h"
57
#include "exprs/function/function.h"
58
#include "exprs/function/like.h"
59
#include "exprs/function/simple_function_factory.h"
60
#include "exprs/function_context.h"
61
#include "util/jsonb_document.h"
62
#include "util/jsonb_stream.h"
63
#include "util/jsonb_utils.h"
64
#include "util/jsonb_writer.h"
65
#include "util/simd/bits.h"
66
67
namespace doris {
68
69
enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT };
70
71
enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE };
72
73
// func(string,string) -> json
74
template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode>
75
class FunctionJsonbParseBase : public IFunction {
76
private:
77
    struct FunctionJsonbParseState {
78
        StringRef default_value;
79
        JsonBinaryValue default_value_parser;
80
        bool has_const_default_value = false;
81
        bool default_is_null = false;
82
    };
83
84
public:
85
    static constexpr auto name = "json_parse";
86
    static constexpr auto alias = "jsonb_parse";
87
87
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv
Line
Count
Source
87
27
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv
Line
Count
Source
87
39
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv
Line
Count
Source
87
21
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
88
89
4
    String get_name() const override {
90
4
        String error_mode;
91
4
        switch (parse_error_handle_mode) {
92
1
        case JsonbParseErrorMode::FAIL:
93
1
            break;
94
1
        case JsonbParseErrorMode::RETURN_NULL:
95
1
            error_mode = "_error_to_null";
96
1
            break;
97
2
        case JsonbParseErrorMode::RETURN_VALUE:
98
2
            error_mode = "_error_to_value";
99
2
            break;
100
4
        }
101
102
4
        return name + error_mode;
103
4
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev
Line
Count
Source
89
1
    String get_name() const override {
90
1
        String error_mode;
91
1
        switch (parse_error_handle_mode) {
92
1
        case JsonbParseErrorMode::FAIL:
93
1
            break;
94
0
        case JsonbParseErrorMode::RETURN_NULL:
95
0
            error_mode = "_error_to_null";
96
0
            break;
97
0
        case JsonbParseErrorMode::RETURN_VALUE:
98
0
            error_mode = "_error_to_value";
99
0
            break;
100
1
        }
101
102
1
        return name + error_mode;
103
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev
Line
Count
Source
89
1
    String get_name() const override {
90
1
        String error_mode;
91
1
        switch (parse_error_handle_mode) {
92
0
        case JsonbParseErrorMode::FAIL:
93
0
            break;
94
1
        case JsonbParseErrorMode::RETURN_NULL:
95
1
            error_mode = "_error_to_null";
96
1
            break;
97
0
        case JsonbParseErrorMode::RETURN_VALUE:
98
0
            error_mode = "_error_to_value";
99
0
            break;
100
1
        }
101
102
1
        return name + error_mode;
103
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev
Line
Count
Source
89
2
    String get_name() const override {
90
2
        String error_mode;
91
2
        switch (parse_error_handle_mode) {
92
0
        case JsonbParseErrorMode::FAIL:
93
0
            break;
94
0
        case JsonbParseErrorMode::RETURN_NULL:
95
0
            error_mode = "_error_to_null";
96
0
            break;
97
2
        case JsonbParseErrorMode::RETURN_VALUE:
98
2
            error_mode = "_error_to_value";
99
2
            break;
100
2
        }
101
102
2
        return name + error_mode;
103
2
    }
104
105
64
    bool is_variadic() const override {
106
64
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
64
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv
Line
Count
Source
105
19
    bool is_variadic() const override {
106
19
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
19
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv
Line
Count
Source
105
31
    bool is_variadic() const override {
106
31
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
31
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv
Line
Count
Source
105
14
    bool is_variadic() const override {
106
14
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
14
    }
108
109
49
    size_t get_number_of_arguments() const override {
110
49
        switch (parse_error_handle_mode) {
111
18
        case JsonbParseErrorMode::FAIL:
112
18
            return 1;
113
30
        case JsonbParseErrorMode::RETURN_NULL:
114
30
            return 1;
115
1
        case JsonbParseErrorMode::RETURN_VALUE:
116
1
            return 0;
117
49
        }
118
49
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv
Line
Count
Source
109
18
    size_t get_number_of_arguments() const override {
110
18
        switch (parse_error_handle_mode) {
111
18
        case JsonbParseErrorMode::FAIL:
112
18
            return 1;
113
0
        case JsonbParseErrorMode::RETURN_NULL:
114
0
            return 1;
115
0
        case JsonbParseErrorMode::RETURN_VALUE:
116
0
            return 0;
117
18
        }
118
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv
Line
Count
Source
109
30
    size_t get_number_of_arguments() const override {
110
30
        switch (parse_error_handle_mode) {
111
0
        case JsonbParseErrorMode::FAIL:
112
0
            return 1;
113
30
        case JsonbParseErrorMode::RETURN_NULL:
114
30
            return 1;
115
0
        case JsonbParseErrorMode::RETURN_VALUE:
116
0
            return 0;
117
30
        }
118
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv
Line
Count
Source
109
1
    size_t get_number_of_arguments() const override {
110
1
        switch (parse_error_handle_mode) {
111
0
        case JsonbParseErrorMode::FAIL:
112
0
            return 1;
113
0
        case JsonbParseErrorMode::RETURN_NULL:
114
0
            return 1;
115
1
        case JsonbParseErrorMode::RETURN_VALUE:
116
1
            return 0;
117
1
        }
118
1
    }
119
120
60
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
60
        bool is_nullable = false;
122
60
        switch (nullable_mode) {
123
30
        case NullalbeMode::NULLABLE:
124
30
            is_nullable = true;
125
30
            break;
126
30
        case NullalbeMode::FOLLOW_INPUT: {
127
41
            for (auto arg : arguments) {
128
41
                is_nullable |= arg->is_nullable();
129
41
            }
130
30
            break;
131
0
        }
132
60
        }
133
134
60
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
60
                           : std::make_shared<DataTypeJsonb>();
136
60
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
18
        bool is_nullable = false;
122
18
        switch (nullable_mode) {
123
0
        case NullalbeMode::NULLABLE:
124
0
            is_nullable = true;
125
0
            break;
126
18
        case NullalbeMode::FOLLOW_INPUT: {
127
18
            for (auto arg : arguments) {
128
18
                is_nullable |= arg->is_nullable();
129
18
            }
130
18
            break;
131
0
        }
132
18
        }
133
134
18
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
18
                           : std::make_shared<DataTypeJsonb>();
136
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
30
        bool is_nullable = false;
122
30
        switch (nullable_mode) {
123
30
        case NullalbeMode::NULLABLE:
124
30
            is_nullable = true;
125
30
            break;
126
0
        case NullalbeMode::FOLLOW_INPUT: {
127
0
            for (auto arg : arguments) {
128
0
                is_nullable |= arg->is_nullable();
129
0
            }
130
0
            break;
131
0
        }
132
30
        }
133
134
30
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
30
                           : std::make_shared<DataTypeJsonb>();
136
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
12
        bool is_nullable = false;
122
12
        switch (nullable_mode) {
123
0
        case NullalbeMode::NULLABLE:
124
0
            is_nullable = true;
125
0
            break;
126
12
        case NullalbeMode::FOLLOW_INPUT: {
127
23
            for (auto arg : arguments) {
128
23
                is_nullable |= arg->is_nullable();
129
23
            }
130
12
            break;
131
0
        }
132
12
        }
133
134
12
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
12
                           : std::make_shared<DataTypeJsonb>();
136
12
    }
137
138
135
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
44
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
27
    bool use_default_implementation_for_nulls() const override { return false; }
139
140
301
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
301
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
59
            std::shared_ptr<FunctionJsonbParseState> state =
143
59
                    std::make_shared<FunctionJsonbParseState>();
144
59
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
59
        }
146
301
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
122
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
11
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
11
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
11
                if (state) {
151
11
                    if (context->get_num_args() == 2) {
152
8
                        if (context->is_col_constant(1)) {
153
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
2
                            if (default_value_col->is_null_at(0)) {
155
1
                                state->default_is_null = true;
156
1
                            } else {
157
1
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
1
                                state->default_value = default_value;
160
1
                                state->has_const_default_value = true;
161
1
                            }
162
2
                        }
163
8
                    } else if (context->get_num_args() == 1) {
164
2
                        RETURN_IF_ERROR(
165
2
                                state->default_value_parser.from_json_string(std::string("{}")));
166
2
                        state->default_value = StringRef(state->default_value_parser.value(),
167
2
                                                         state->default_value_parser.size());
168
2
                        state->has_const_default_value = true;
169
2
                    }
170
11
                }
171
11
            }
172
173
122
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
1
                return Status::InvalidArgument(
175
1
                        "{} function should have 1 or 2 arguments, "
176
1
                        "but got {}",
177
1
                        get_name(), context->get_num_args());
178
1
            }
179
122
        }
180
121
        return Status::OK();
181
301
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
69
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
69
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
18
            std::shared_ptr<FunctionJsonbParseState> state =
143
18
                    std::make_shared<FunctionJsonbParseState>();
144
18
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
18
        }
146
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
                if (state) {
151
                    if (context->get_num_args() == 2) {
152
                        if (context->is_col_constant(1)) {
153
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
                            if (default_value_col->is_null_at(0)) {
155
                                state->default_is_null = true;
156
                            } else {
157
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
                                state->default_value = default_value;
160
                                state->has_const_default_value = true;
161
                            }
162
                        }
163
                    } else if (context->get_num_args() == 1) {
164
                        RETURN_IF_ERROR(
165
                                state->default_value_parser.from_json_string(std::string("{}")));
166
                        state->default_value = StringRef(state->default_value_parser.value(),
167
                                                         state->default_value_parser.size());
168
                        state->has_const_default_value = true;
169
                    }
170
                }
171
            }
172
173
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
                return Status::InvalidArgument(
175
                        "{} function should have 1 or 2 arguments, "
176
                        "but got {}",
177
                        get_name(), context->get_num_args());
178
            }
179
        }
180
69
        return Status::OK();
181
69
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
110
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
110
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
30
            std::shared_ptr<FunctionJsonbParseState> state =
143
30
                    std::make_shared<FunctionJsonbParseState>();
144
30
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
30
        }
146
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
                if (state) {
151
                    if (context->get_num_args() == 2) {
152
                        if (context->is_col_constant(1)) {
153
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
                            if (default_value_col->is_null_at(0)) {
155
                                state->default_is_null = true;
156
                            } else {
157
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
                                state->default_value = default_value;
160
                                state->has_const_default_value = true;
161
                            }
162
                        }
163
                    } else if (context->get_num_args() == 1) {
164
                        RETURN_IF_ERROR(
165
                                state->default_value_parser.from_json_string(std::string("{}")));
166
                        state->default_value = StringRef(state->default_value_parser.value(),
167
                                                         state->default_value_parser.size());
168
                        state->has_const_default_value = true;
169
                    }
170
                }
171
            }
172
173
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
                return Status::InvalidArgument(
175
                        "{} function should have 1 or 2 arguments, "
176
                        "but got {}",
177
                        get_name(), context->get_num_args());
178
            }
179
        }
180
110
        return Status::OK();
181
110
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
122
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
122
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
11
            std::shared_ptr<FunctionJsonbParseState> state =
143
11
                    std::make_shared<FunctionJsonbParseState>();
144
11
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
11
        }
146
122
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
122
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
11
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
11
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
11
                if (state) {
151
11
                    if (context->get_num_args() == 2) {
152
8
                        if (context->is_col_constant(1)) {
153
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
2
                            if (default_value_col->is_null_at(0)) {
155
1
                                state->default_is_null = true;
156
1
                            } else {
157
1
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
1
                                state->default_value = default_value;
160
1
                                state->has_const_default_value = true;
161
1
                            }
162
2
                        }
163
8
                    } else if (context->get_num_args() == 1) {
164
2
                        RETURN_IF_ERROR(
165
2
                                state->default_value_parser.from_json_string(std::string("{}")));
166
2
                        state->default_value = StringRef(state->default_value_parser.value(),
167
2
                                                         state->default_value_parser.size());
168
2
                        state->has_const_default_value = true;
169
2
                    }
170
11
                }
171
11
            }
172
173
122
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
1
                return Status::InvalidArgument(
175
1
                        "{} function should have 1 or 2 arguments, "
176
1
                        "but got {}",
177
1
                        get_name(), context->get_num_args());
178
1
            }
179
122
        }
180
121
        return Status::OK();
181
122
    }
182
183
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
184
75
                        uint32_t result, size_t input_rows_count) const override {
185
75
        auto&& [col_from, col_from_is_const] =
186
75
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
75
        if (col_from_is_const && col_from->is_null_at(0)) {
189
1
            auto col_str = ColumnString::create();
190
1
            col_str->insert_default();
191
1
            auto null_map = ColumnUInt8::create(1, 1);
192
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
1
            block.get_by_position(result).column =
194
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
1
            return Status::OK();
196
1
        }
197
198
74
        auto null_map = ColumnUInt8::create(0, 0);
199
74
        bool is_nullable = false;
200
201
74
        switch (nullable_mode) {
202
34
        case NullalbeMode::NULLABLE: {
203
34
            is_nullable = true;
204
34
            break;
205
0
        }
206
40
        case NullalbeMode::FOLLOW_INPUT: {
207
52
            for (auto arg : arguments) {
208
52
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
52
            }
210
40
            break;
211
0
        }
212
74
        }
213
214
74
        if (is_nullable) {
215
64
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
64
        }
217
218
60
        const ColumnString* col_from_string = nullptr;
219
74
        if (col_from->is_nullable()) {
220
39
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
221
222
39
            VectorizedUtils::update_null_map(null_map->get_data(),
223
39
                                             nullable_col.get_null_map_data());
224
39
            col_from_string =
225
39
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
226
39
        } else {
227
35
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
228
35
        }
229
230
60
        StringRef constant_default_value;
231
60
        bool default_value_const = false;
232
60
        bool default_value_null_const = false;
233
60
        ColumnPtr default_value_col;
234
60
        JsonBinaryValue default_jsonb_value_parser;
235
60
        const ColumnString* default_value_str_col = nullptr;
236
60
        const NullMap* default_value_nullmap = nullptr;
237
60
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
238
14
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
239
14
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
240
14
            if (state && state->has_const_default_value) {
241
7
                constant_default_value = state->default_value;
242
7
                default_value_null_const = state->default_is_null;
243
7
                default_value_const = true;
244
7
            } else if (arguments.size() > 1) {
245
7
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
246
7
                    PrimitiveType::TYPE_JSONB) {
247
1
                    return Status::InvalidArgument(
248
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
249
1
                            block.get_by_position(arguments[1]).type->get_name());
250
1
                }
251
6
                std::tie(default_value_col, default_value_const) =
252
6
                        unpack_if_const(block.get_by_position(arguments[1]).column);
253
6
                if (default_value_const) {
254
1
                    const JsonbDocument* default_value_doc = nullptr;
255
1
                    if (default_value_col->is_null_at(0)) {
256
1
                        default_value_null_const = true;
257
1
                    } else {
258
0
                        auto data = default_value_col->get_data_at(0);
259
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
260
0
                                                                              &default_value_doc));
261
0
                        constant_default_value = data;
262
0
                    }
263
5
                } else {
264
5
                    if (default_value_col->is_nullable()) {
265
4
                        const auto& nullable_col =
266
4
                                assert_cast<const ColumnNullable&>(*default_value_col);
267
4
                        default_value_str_col = assert_cast<const ColumnString*>(
268
4
                                nullable_col.get_nested_column_ptr().get());
269
4
                        default_value_nullmap = &(nullable_col.get_null_map_data());
270
4
                    } else {
271
1
                        default_value_str_col =
272
1
                                assert_cast<const ColumnString*>(default_value_col.get());
273
1
                    }
274
5
                }
275
6
            } else if (arguments.size() == 1) {
276
                // parse default value '{}' should always success.
277
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
278
0
                default_value_const = true;
279
0
                constant_default_value.data = default_jsonb_value_parser.value();
280
0
                constant_default_value.size = default_jsonb_value_parser.size();
281
0
            }
282
14
        }
283
284
13
        auto col_to = ColumnString::create();
285
286
60
        col_to->reserve(input_rows_count);
287
288
60
        auto& null_map_data = null_map->get_data();
289
290
        // parser can be reused for performance
291
60
        JsonBinaryValue jsonb_value;
292
293
282
        for (size_t i = 0; i < input_rows_count; ++i) {
294
205
            if (is_nullable && null_map_data[i]) {
295
13
                col_to->insert_default();
296
13
                continue;
297
13
            }
298
299
192
            auto index = index_check_const(i, col_from_is_const);
300
192
            const auto& val = col_from_string->get_data_at(index);
301
192
            auto st = jsonb_value.from_json_string(val.data, val.size);
302
192
            if (st.ok()) {
303
                // insert jsonb format data
304
138
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
305
138
            } else {
306
54
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
307
6
                    return Status::InvalidArgument(
308
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
309
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
310
17
                    null_map_data[i] = 1;
311
17
                    col_to->insert_default();
312
31
                } else {
313
31
                    if (default_value_const) {
314
9
                        if (default_value_null_const) {
315
3
                            null_map_data[i] = 1;
316
3
                            col_to->insert_default();
317
6
                        } else {
318
6
                            col_to->insert_data(constant_default_value.data,
319
6
                                                constant_default_value.size);
320
6
                        }
321
22
                    } else {
322
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
323
3
                            null_map_data[i] = 1;
324
3
                            col_to->insert_default();
325
3
                            continue;
326
3
                        }
327
19
                        auto value = default_value_str_col->get_data_at(i);
328
19
                        col_to->insert_data(value.data, value.size);
329
19
                    }
330
31
                }
331
54
            }
332
192
        }
333
334
77
        if (is_nullable) {
335
58
            block.replace_by_position(
336
58
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
337
58
        } else {
338
19
            block.replace_by_position(result, std::move(col_to));
339
19
        }
340
341
17
        return Status::OK();
342
14
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
26
                        uint32_t result, size_t input_rows_count) const override {
185
26
        auto&& [col_from, col_from_is_const] =
186
26
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
26
        if (col_from_is_const && col_from->is_null_at(0)) {
189
0
            auto col_str = ColumnString::create();
190
0
            col_str->insert_default();
191
0
            auto null_map = ColumnUInt8::create(1, 1);
192
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
0
            block.get_by_position(result).column =
194
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
0
            return Status::OK();
196
0
        }
197
198
26
        auto null_map = ColumnUInt8::create(0, 0);
199
26
        bool is_nullable = false;
200
201
26
        switch (nullable_mode) {
202
0
        case NullalbeMode::NULLABLE: {
203
0
            is_nullable = true;
204
0
            break;
205
0
        }
206
26
        case NullalbeMode::FOLLOW_INPUT: {
207
26
            for (auto arg : arguments) {
208
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
26
            }
210
26
            break;
211
0
        }
212
26
        }
213
214
26
        if (is_nullable) {
215
17
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
17
        }
217
218
26
        const ColumnString* col_from_string = nullptr;
219
26
        if (col_from->is_nullable()) {
220
17
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
221
222
17
            VectorizedUtils::update_null_map(null_map->get_data(),
223
17
                                             nullable_col.get_null_map_data());
224
17
            col_from_string =
225
17
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
226
17
        } else {
227
9
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
228
9
        }
229
230
26
        StringRef constant_default_value;
231
26
        bool default_value_const = false;
232
26
        bool default_value_null_const = false;
233
26
        ColumnPtr default_value_col;
234
26
        JsonBinaryValue default_jsonb_value_parser;
235
26
        const ColumnString* default_value_str_col = nullptr;
236
26
        const NullMap* default_value_nullmap = nullptr;
237
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
238
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
239
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
240
            if (state && state->has_const_default_value) {
241
                constant_default_value = state->default_value;
242
                default_value_null_const = state->default_is_null;
243
                default_value_const = true;
244
            } else if (arguments.size() > 1) {
245
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
246
                    PrimitiveType::TYPE_JSONB) {
247
                    return Status::InvalidArgument(
248
                            "{} second argument should be jsonb type, but got {}", get_name(),
249
                            block.get_by_position(arguments[1]).type->get_name());
250
                }
251
                std::tie(default_value_col, default_value_const) =
252
                        unpack_if_const(block.get_by_position(arguments[1]).column);
253
                if (default_value_const) {
254
                    const JsonbDocument* default_value_doc = nullptr;
255
                    if (default_value_col->is_null_at(0)) {
256
                        default_value_null_const = true;
257
                    } else {
258
                        auto data = default_value_col->get_data_at(0);
259
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
260
                                                                              &default_value_doc));
261
                        constant_default_value = data;
262
                    }
263
                } else {
264
                    if (default_value_col->is_nullable()) {
265
                        const auto& nullable_col =
266
                                assert_cast<const ColumnNullable&>(*default_value_col);
267
                        default_value_str_col = assert_cast<const ColumnString*>(
268
                                nullable_col.get_nested_column_ptr().get());
269
                        default_value_nullmap = &(nullable_col.get_null_map_data());
270
                    } else {
271
                        default_value_str_col =
272
                                assert_cast<const ColumnString*>(default_value_col.get());
273
                    }
274
                }
275
            } else if (arguments.size() == 1) {
276
                // parse default value '{}' should always success.
277
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
278
                default_value_const = true;
279
                constant_default_value.data = default_jsonb_value_parser.value();
280
                constant_default_value.size = default_jsonb_value_parser.size();
281
            }
282
        }
283
284
26
        auto col_to = ColumnString::create();
285
286
26
        col_to->reserve(input_rows_count);
287
288
26
        auto& null_map_data = null_map->get_data();
289
290
        // parser can be reused for performance
291
26
        JsonBinaryValue jsonb_value;
292
293
68
        for (size_t i = 0; i < input_rows_count; ++i) {
294
42
            if (is_nullable && null_map_data[i]) {
295
1
                col_to->insert_default();
296
1
                continue;
297
1
            }
298
299
41
            auto index = index_check_const(i, col_from_is_const);
300
41
            const auto& val = col_from_string->get_data_at(index);
301
41
            auto st = jsonb_value.from_json_string(val.data, val.size);
302
41
            if (st.ok()) {
303
                // insert jsonb format data
304
35
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
305
35
            } else {
306
6
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
307
6
                    return Status::InvalidArgument(
308
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
309
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
310
                    null_map_data[i] = 1;
311
                    col_to->insert_default();
312
                } else {
313
                    if (default_value_const) {
314
                        if (default_value_null_const) {
315
                            null_map_data[i] = 1;
316
                            col_to->insert_default();
317
                        } else {
318
                            col_to->insert_data(constant_default_value.data,
319
                                                constant_default_value.size);
320
                        }
321
                    } else {
322
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
323
                            null_map_data[i] = 1;
324
                            col_to->insert_default();
325
                            continue;
326
                        }
327
                        auto value = default_value_str_col->get_data_at(i);
328
                        col_to->insert_data(value.data, value.size);
329
                    }
330
                }
331
6
            }
332
41
        }
333
334
26
        if (is_nullable) {
335
11
            block.replace_by_position(
336
11
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
337
15
        } else {
338
15
            block.replace_by_position(result, std::move(col_to));
339
15
        }
340
341
26
        return Status::OK();
342
26
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
34
                        uint32_t result, size_t input_rows_count) const override {
185
34
        auto&& [col_from, col_from_is_const] =
186
34
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
34
        if (col_from_is_const && col_from->is_null_at(0)) {
189
0
            auto col_str = ColumnString::create();
190
0
            col_str->insert_default();
191
0
            auto null_map = ColumnUInt8::create(1, 1);
192
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
0
            block.get_by_position(result).column =
194
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
0
            return Status::OK();
196
0
        }
197
198
34
        auto null_map = ColumnUInt8::create(0, 0);
199
34
        bool is_nullable = false;
200
201
34
        switch (nullable_mode) {
202
34
        case NullalbeMode::NULLABLE: {
203
34
            is_nullable = true;
204
34
            break;
205
0
        }
206
0
        case NullalbeMode::FOLLOW_INPUT: {
207
0
            for (auto arg : arguments) {
208
0
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
0
            }
210
0
            break;
211
0
        }
212
34
        }
213
214
34
        if (is_nullable) {
215
34
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
34
        }
217
218
34
        const ColumnString* col_from_string = nullptr;
219
34
        if (col_from->is_nullable()) {
220
11
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
221
222
11
            VectorizedUtils::update_null_map(null_map->get_data(),
223
11
                                             nullable_col.get_null_map_data());
224
11
            col_from_string =
225
11
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
226
23
        } else {
227
23
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
228
23
        }
229
230
34
        StringRef constant_default_value;
231
34
        bool default_value_const = false;
232
34
        bool default_value_null_const = false;
233
34
        ColumnPtr default_value_col;
234
34
        JsonBinaryValue default_jsonb_value_parser;
235
34
        const ColumnString* default_value_str_col = nullptr;
236
34
        const NullMap* default_value_nullmap = nullptr;
237
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
238
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
239
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
240
            if (state && state->has_const_default_value) {
241
                constant_default_value = state->default_value;
242
                default_value_null_const = state->default_is_null;
243
                default_value_const = true;
244
            } else if (arguments.size() > 1) {
245
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
246
                    PrimitiveType::TYPE_JSONB) {
247
                    return Status::InvalidArgument(
248
                            "{} second argument should be jsonb type, but got {}", get_name(),
249
                            block.get_by_position(arguments[1]).type->get_name());
250
                }
251
                std::tie(default_value_col, default_value_const) =
252
                        unpack_if_const(block.get_by_position(arguments[1]).column);
253
                if (default_value_const) {
254
                    const JsonbDocument* default_value_doc = nullptr;
255
                    if (default_value_col->is_null_at(0)) {
256
                        default_value_null_const = true;
257
                    } else {
258
                        auto data = default_value_col->get_data_at(0);
259
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
260
                                                                              &default_value_doc));
261
                        constant_default_value = data;
262
                    }
263
                } else {
264
                    if (default_value_col->is_nullable()) {
265
                        const auto& nullable_col =
266
                                assert_cast<const ColumnNullable&>(*default_value_col);
267
                        default_value_str_col = assert_cast<const ColumnString*>(
268
                                nullable_col.get_nested_column_ptr().get());
269
                        default_value_nullmap = &(nullable_col.get_null_map_data());
270
                    } else {
271
                        default_value_str_col =
272
                                assert_cast<const ColumnString*>(default_value_col.get());
273
                    }
274
                }
275
            } else if (arguments.size() == 1) {
276
                // parse default value '{}' should always success.
277
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
278
                default_value_const = true;
279
                constant_default_value.data = default_jsonb_value_parser.value();
280
                constant_default_value.size = default_jsonb_value_parser.size();
281
            }
282
        }
283
284
34
        auto col_to = ColumnString::create();
285
286
34
        col_to->reserve(input_rows_count);
287
288
34
        auto& null_map_data = null_map->get_data();
289
290
        // parser can be reused for performance
291
34
        JsonBinaryValue jsonb_value;
292
293
99
        for (size_t i = 0; i < input_rows_count; ++i) {
294
65
            if (is_nullable && null_map_data[i]) {
295
6
                col_to->insert_default();
296
6
                continue;
297
6
            }
298
299
59
            auto index = index_check_const(i, col_from_is_const);
300
59
            const auto& val = col_from_string->get_data_at(index);
301
59
            auto st = jsonb_value.from_json_string(val.data, val.size);
302
59
            if (st.ok()) {
303
                // insert jsonb format data
304
42
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
305
42
            } else {
306
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
307
                    return Status::InvalidArgument(
308
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
309
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
310
17
                    null_map_data[i] = 1;
311
17
                    col_to->insert_default();
312
                } else {
313
                    if (default_value_const) {
314
                        if (default_value_null_const) {
315
                            null_map_data[i] = 1;
316
                            col_to->insert_default();
317
                        } else {
318
                            col_to->insert_data(constant_default_value.data,
319
                                                constant_default_value.size);
320
                        }
321
                    } else {
322
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
323
                            null_map_data[i] = 1;
324
                            col_to->insert_default();
325
                            continue;
326
                        }
327
                        auto value = default_value_str_col->get_data_at(i);
328
                        col_to->insert_data(value.data, value.size);
329
                    }
330
                }
331
17
            }
332
59
        }
333
334
34
        if (is_nullable) {
335
34
            block.replace_by_position(
336
34
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
337
34
        } else {
338
0
            block.replace_by_position(result, std::move(col_to));
339
0
        }
340
341
34
        return Status::OK();
342
34
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
15
                        uint32_t result, size_t input_rows_count) const override {
185
15
        auto&& [col_from, col_from_is_const] =
186
15
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
15
        if (col_from_is_const && col_from->is_null_at(0)) {
189
1
            auto col_str = ColumnString::create();
190
1
            col_str->insert_default();
191
1
            auto null_map = ColumnUInt8::create(1, 1);
192
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
1
            block.get_by_position(result).column =
194
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
1
            return Status::OK();
196
1
        }
197
198
14
        auto null_map = ColumnUInt8::create(0, 0);
199
14
        bool is_nullable = false;
200
201
14
        switch (nullable_mode) {
202
0
        case NullalbeMode::NULLABLE: {
203
0
            is_nullable = true;
204
0
            break;
205
0
        }
206
14
        case NullalbeMode::FOLLOW_INPUT: {
207
26
            for (auto arg : arguments) {
208
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
26
            }
210
14
            break;
211
0
        }
212
14
        }
213
214
14
        if (is_nullable) {
215
13
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
13
        }
217
218
14
        const ColumnString* col_from_string = nullptr;
219
14
        if (col_from->is_nullable()) {
220
11
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*col_from);
221
222
11
            VectorizedUtils::update_null_map(null_map->get_data(),
223
11
                                             nullable_col.get_null_map_data());
224
11
            col_from_string =
225
11
                    assert_cast<const ColumnString*>(nullable_col.get_nested_column_ptr().get());
226
11
        } else {
227
3
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
228
3
        }
229
230
14
        StringRef constant_default_value;
231
14
        bool default_value_const = false;
232
14
        bool default_value_null_const = false;
233
14
        ColumnPtr default_value_col;
234
14
        JsonBinaryValue default_jsonb_value_parser;
235
14
        const ColumnString* default_value_str_col = nullptr;
236
14
        const NullMap* default_value_nullmap = nullptr;
237
14
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
238
14
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
239
14
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
240
14
            if (state && state->has_const_default_value) {
241
7
                constant_default_value = state->default_value;
242
7
                default_value_null_const = state->default_is_null;
243
7
                default_value_const = true;
244
7
            } else if (arguments.size() > 1) {
245
7
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
246
7
                    PrimitiveType::TYPE_JSONB) {
247
1
                    return Status::InvalidArgument(
248
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
249
1
                            block.get_by_position(arguments[1]).type->get_name());
250
1
                }
251
6
                std::tie(default_value_col, default_value_const) =
252
6
                        unpack_if_const(block.get_by_position(arguments[1]).column);
253
6
                if (default_value_const) {
254
1
                    const JsonbDocument* default_value_doc = nullptr;
255
1
                    if (default_value_col->is_null_at(0)) {
256
1
                        default_value_null_const = true;
257
1
                    } else {
258
0
                        auto data = default_value_col->get_data_at(0);
259
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
260
0
                                                                              &default_value_doc));
261
0
                        constant_default_value = data;
262
0
                    }
263
5
                } else {
264
5
                    if (default_value_col->is_nullable()) {
265
4
                        const auto& nullable_col =
266
4
                                assert_cast<const ColumnNullable&>(*default_value_col);
267
4
                        default_value_str_col = assert_cast<const ColumnString*>(
268
4
                                nullable_col.get_nested_column_ptr().get());
269
4
                        default_value_nullmap = &(nullable_col.get_null_map_data());
270
4
                    } else {
271
1
                        default_value_str_col =
272
1
                                assert_cast<const ColumnString*>(default_value_col.get());
273
1
                    }
274
5
                }
275
6
            } else if (arguments.size() == 1) {
276
                // parse default value '{}' should always success.
277
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
278
0
                default_value_const = true;
279
0
                constant_default_value.data = default_jsonb_value_parser.value();
280
0
                constant_default_value.size = default_jsonb_value_parser.size();
281
0
            }
282
14
        }
283
284
13
        auto col_to = ColumnString::create();
285
286
14
        col_to->reserve(input_rows_count);
287
288
14
        auto& null_map_data = null_map->get_data();
289
290
        // parser can be reused for performance
291
14
        JsonBinaryValue jsonb_value;
292
293
115
        for (size_t i = 0; i < input_rows_count; ++i) {
294
98
            if (is_nullable && null_map_data[i]) {
295
6
                col_to->insert_default();
296
6
                continue;
297
6
            }
298
299
92
            auto index = index_check_const(i, col_from_is_const);
300
92
            const auto& val = col_from_string->get_data_at(index);
301
92
            auto st = jsonb_value.from_json_string(val.data, val.size);
302
92
            if (st.ok()) {
303
                // insert jsonb format data
304
61
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
305
61
            } else {
306
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
307
                    return Status::InvalidArgument(
308
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
309
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
310
                    null_map_data[i] = 1;
311
                    col_to->insert_default();
312
31
                } else {
313
31
                    if (default_value_const) {
314
9
                        if (default_value_null_const) {
315
3
                            null_map_data[i] = 1;
316
3
                            col_to->insert_default();
317
6
                        } else {
318
6
                            col_to->insert_data(constant_default_value.data,
319
6
                                                constant_default_value.size);
320
6
                        }
321
22
                    } else {
322
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
323
3
                            null_map_data[i] = 1;
324
3
                            col_to->insert_default();
325
3
                            continue;
326
3
                        }
327
19
                        auto value = default_value_str_col->get_data_at(i);
328
19
                        col_to->insert_data(value.data, value.size);
329
19
                    }
330
31
                }
331
31
            }
332
92
        }
333
334
17
        if (is_nullable) {
335
13
            block.replace_by_position(
336
13
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
337
13
        } else {
338
4
            block.replace_by_position(result, std::move(col_to));
339
4
        }
340
341
17
        return Status::OK();
342
14
    }
343
};
344
345
// jsonb_parse return type nullable as input
346
using FunctionJsonbParse =
347
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>;
348
using FunctionJsonbParseErrorNull =
349
        FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>;
350
using FunctionJsonbParseErrorValue =
351
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>;
352
353
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
354
template <typename Impl>
355
class FunctionJsonbExtract : public IFunction {
356
public:
357
    static constexpr auto name = Impl::name;
358
    static constexpr auto alias = Impl::alias;
359
1.64k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv
Line
Count
Source
359
149
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv
Line
Count
Source
359
145
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv
Line
Count
Source
359
1.33k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv
Line
Count
Source
359
15
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
360
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev
361
1.61k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv
Line
Count
Source
361
141
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv
Line
Count
Source
361
137
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv
Line
Count
Source
361
1.32k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv
Line
Count
Source
361
7
    bool is_variadic() const override { return true; }
362
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv
Line
Count
Source
362
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv
363
14.2k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv
Line
Count
Source
363
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv
Line
Count
Source
363
1.45k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv
Line
Count
Source
363
11.3k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv
Line
Count
Source
363
12
    bool use_default_implementation_for_nulls() const override { return false; }
364
1.60k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
365
1.60k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
366
1.60k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
364
140
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
365
140
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
366
140
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
364
136
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
365
136
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
366
136
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
364
1.32k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
365
1.32k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
366
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
364
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
365
6
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
366
6
    }
367
32
    DataTypes get_variadic_argument_types_impl() const override {
368
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
369
            return Impl::get_variadic_argument_types_impl();
370
32
        } else {
371
32
            return {};
372
32
        }
373
32
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
367
8
    DataTypes get_variadic_argument_types_impl() const override {
368
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
369
            return Impl::get_variadic_argument_types_impl();
370
8
        } else {
371
8
            return {};
372
8
        }
373
8
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv
Line
Count
Source
367
8
    DataTypes get_variadic_argument_types_impl() const override {
368
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
369
            return Impl::get_variadic_argument_types_impl();
370
8
        } else {
371
8
            return {};
372
8
        }
373
8
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv
Line
Count
Source
367
8
    DataTypes get_variadic_argument_types_impl() const override {
368
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
369
            return Impl::get_variadic_argument_types_impl();
370
8
        } else {
371
8
            return {};
372
8
        }
373
8
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv
Line
Count
Source
367
8
    DataTypes get_variadic_argument_types_impl() const override {
368
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
369
            return Impl::get_variadic_argument_types_impl();
370
8
        } else {
371
8
            return {};
372
8
        }
373
8
    }
374
375
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
376
12.6k
                        uint32_t result, size_t input_rows_count) const override {
377
12.6k
        DCHECK_GE(arguments.size(), 2);
378
379
12.6k
        ColumnPtr jsonb_data_column;
380
12.6k
        bool jsonb_data_const = false;
381
12.6k
        const NullMap* data_null_map = nullptr;
382
383
12.6k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
384
12.6k
            PrimitiveType::TYPE_JSONB) {
385
1
            return Status::InvalidArgument(
386
1
                    "jsonb_extract first argument should be json type, but got {}",
387
1
                    block.get_by_position(arguments[0]).type->get_name());
388
1
        }
389
390
        // prepare jsonb data column
391
12.6k
        std::tie(jsonb_data_column, jsonb_data_const) =
392
12.6k
                unpack_if_const(block.get_by_position(arguments[0]).column);
393
12.6k
        if (jsonb_data_column->is_nullable()) {
394
10.8k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
395
10.8k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
396
10.8k
            data_null_map = &nullable_column.get_null_map_data();
397
10.8k
        }
398
12.6k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
399
12.6k
        const auto& loffsets =
400
12.6k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
401
402
        // prepare parse path column prepare
403
12.6k
        std::vector<const ColumnString*> jsonb_path_columns;
404
12.6k
        std::vector<bool> path_const(arguments.size() - 1);
405
12.6k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
406
25.5k
        for (int i = 0; i < arguments.size() - 1; ++i) {
407
12.9k
            ColumnPtr path_column;
408
12.9k
            bool is_const = false;
409
12.9k
            std::tie(path_column, is_const) =
410
12.9k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
411
12.9k
            path_const[i] = is_const;
412
12.9k
            if (path_column->is_nullable()) {
413
66
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
414
66
                path_column = nullable_column.get_nested_column_ptr();
415
66
                path_null_maps[i] = &nullable_column.get_null_map_data();
416
66
            }
417
12.9k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
418
12.9k
        }
419
420
12.6k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
421
12.6k
        auto res = Impl::ColumnType::create();
422
423
        // execute Impl
424
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
425
11.2k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
426
11.2k
            auto& res_data = res->get_chars();
427
11.2k
            auto& res_offsets = res->get_offsets();
428
11.2k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
429
11.2k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
430
11.2k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
431
11.2k
        } else {
432
            // not support other extract type for now (e.g. int, double, ...)
433
1.31k
            DCHECK_EQ(jsonb_path_columns.size(), 1);
434
1.31k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
435
1.31k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
436
437
1.31k
            auto create_all_null_result = [&]() {
438
2
                res = Impl::ColumnType::create();
439
2
                res->insert_default();
440
2
                auto nullable_column =
441
2
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
442
2
                auto const_column =
443
2
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
444
2
                block.get_by_position(result).column = std::move(const_column);
445
2
                return Status::OK();
446
2
            };
447
448
1.31k
            if (jsonb_data_const) {
449
2
                if (data_null_map && (*data_null_map)[0]) {
450
1
                    return create_all_null_result();
451
1
                }
452
453
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
454
1
                                                    rdata, roffsets, path_null_maps[0],
455
1
                                                    res->get_data(), null_map->get_data()));
456
1.32k
            } else if (path_const[0]) {
457
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
458
1
                    return create_all_null_result();
459
1
                }
460
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
461
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
462
1.32k
                                                    res->get_data(), null_map->get_data()));
463
18.4E
            } else {
464
18.4E
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
465
18.4E
                                                    roffsets, path_null_maps[0], res->get_data(),
466
18.4E
                                                    null_map->get_data()));
467
18.4E
            }
468
1.31k
        }
469
470
12.6k
        block.get_by_position(result).column =
471
12.6k
                ColumnNullable::create(std::move(res), std::move(null_map));
472
12.6k
        return Status::OK();
473
12.6k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
376
1.32k
                        uint32_t result, size_t input_rows_count) const override {
377
1.32k
        DCHECK_GE(arguments.size(), 2);
378
379
1.32k
        ColumnPtr jsonb_data_column;
380
1.32k
        bool jsonb_data_const = false;
381
1.32k
        const NullMap* data_null_map = nullptr;
382
383
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
384
1.32k
            PrimitiveType::TYPE_JSONB) {
385
0
            return Status::InvalidArgument(
386
0
                    "jsonb_extract first argument should be json type, but got {}",
387
0
                    block.get_by_position(arguments[0]).type->get_name());
388
0
        }
389
390
        // prepare jsonb data column
391
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
392
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
393
1.32k
        if (jsonb_data_column->is_nullable()) {
394
1.14k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
395
1.14k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
396
1.14k
            data_null_map = &nullable_column.get_null_map_data();
397
1.14k
        }
398
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
399
1.32k
        const auto& loffsets =
400
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
401
402
        // prepare parse path column prepare
403
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
404
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
405
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
406
2.65k
        for (int i = 0; i < arguments.size() - 1; ++i) {
407
1.32k
            ColumnPtr path_column;
408
1.32k
            bool is_const = false;
409
1.32k
            std::tie(path_column, is_const) =
410
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
411
1.32k
            path_const[i] = is_const;
412
1.32k
            if (path_column->is_nullable()) {
413
5
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
414
5
                path_column = nullable_column.get_nested_column_ptr();
415
5
                path_null_maps[i] = &nullable_column.get_null_map_data();
416
5
            }
417
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
418
1.32k
        }
419
420
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
421
1.32k
        auto res = Impl::ColumnType::create();
422
423
        // execute Impl
424
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
425
1.32k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
426
1.32k
            auto& res_data = res->get_chars();
427
1.32k
            auto& res_offsets = res->get_offsets();
428
1.32k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
429
1.32k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
430
1.32k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
431
        } else {
432
            // not support other extract type for now (e.g. int, double, ...)
433
            DCHECK_EQ(jsonb_path_columns.size(), 1);
434
            const auto& rdata = jsonb_path_columns[0]->get_chars();
435
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
436
437
            auto create_all_null_result = [&]() {
438
                res = Impl::ColumnType::create();
439
                res->insert_default();
440
                auto nullable_column =
441
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
442
                auto const_column =
443
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
444
                block.get_by_position(result).column = std::move(const_column);
445
                return Status::OK();
446
            };
447
448
            if (jsonb_data_const) {
449
                if (data_null_map && (*data_null_map)[0]) {
450
                    return create_all_null_result();
451
                }
452
453
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
454
                                                    rdata, roffsets, path_null_maps[0],
455
                                                    res->get_data(), null_map->get_data()));
456
            } else if (path_const[0]) {
457
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
458
                    return create_all_null_result();
459
                }
460
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
461
                                                    jsonb_path_columns[0]->get_data_at(0),
462
                                                    res->get_data(), null_map->get_data()));
463
            } else {
464
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
465
                                                    roffsets, path_null_maps[0], res->get_data(),
466
                                                    null_map->get_data()));
467
            }
468
        }
469
470
1.32k
        block.get_by_position(result).column =
471
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
472
1.32k
        return Status::OK();
473
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
376
1.31k
                        uint32_t result, size_t input_rows_count) const override {
377
1.31k
        DCHECK_GE(arguments.size(), 2);
378
379
1.31k
        ColumnPtr jsonb_data_column;
380
1.31k
        bool jsonb_data_const = false;
381
1.31k
        const NullMap* data_null_map = nullptr;
382
383
1.31k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
384
1.31k
            PrimitiveType::TYPE_JSONB) {
385
0
            return Status::InvalidArgument(
386
0
                    "jsonb_extract first argument should be json type, but got {}",
387
0
                    block.get_by_position(arguments[0]).type->get_name());
388
0
        }
389
390
        // prepare jsonb data column
391
1.31k
        std::tie(jsonb_data_column, jsonb_data_const) =
392
1.31k
                unpack_if_const(block.get_by_position(arguments[0]).column);
393
1.31k
        if (jsonb_data_column->is_nullable()) {
394
1.14k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
395
1.14k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
396
1.14k
            data_null_map = &nullable_column.get_null_map_data();
397
1.14k
        }
398
1.31k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
399
1.31k
        const auto& loffsets =
400
1.31k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
401
402
        // prepare parse path column prepare
403
1.31k
        std::vector<const ColumnString*> jsonb_path_columns;
404
1.31k
        std::vector<bool> path_const(arguments.size() - 1);
405
1.31k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
406
2.64k
        for (int i = 0; i < arguments.size() - 1; ++i) {
407
1.32k
            ColumnPtr path_column;
408
1.32k
            bool is_const = false;
409
1.32k
            std::tie(path_column, is_const) =
410
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
411
1.32k
            path_const[i] = is_const;
412
1.32k
            if (path_column->is_nullable()) {
413
4
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
414
4
                path_column = nullable_column.get_nested_column_ptr();
415
4
                path_null_maps[i] = &nullable_column.get_null_map_data();
416
4
            }
417
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
418
1.32k
        }
419
420
1.31k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
421
1.31k
        auto res = Impl::ColumnType::create();
422
423
        // execute Impl
424
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
425
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
426
            auto& res_data = res->get_chars();
427
            auto& res_offsets = res->get_offsets();
428
            RETURN_IF_ERROR(Impl::vector_vector_v2(
429
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
430
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
431
1.31k
        } else {
432
            // not support other extract type for now (e.g. int, double, ...)
433
1.31k
            DCHECK_EQ(jsonb_path_columns.size(), 1);
434
1.31k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
435
1.31k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
436
437
1.31k
            auto create_all_null_result = [&]() {
438
1.31k
                res = Impl::ColumnType::create();
439
1.31k
                res->insert_default();
440
1.31k
                auto nullable_column =
441
1.31k
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
442
1.31k
                auto const_column =
443
1.31k
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
444
1.31k
                block.get_by_position(result).column = std::move(const_column);
445
1.31k
                return Status::OK();
446
1.31k
            };
447
448
1.31k
            if (jsonb_data_const) {
449
2
                if (data_null_map && (*data_null_map)[0]) {
450
1
                    return create_all_null_result();
451
1
                }
452
453
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
454
1
                                                    rdata, roffsets, path_null_maps[0],
455
1
                                                    res->get_data(), null_map->get_data()));
456
1.32k
            } else if (path_const[0]) {
457
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
458
1
                    return create_all_null_result();
459
1
                }
460
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
461
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
462
1.32k
                                                    res->get_data(), null_map->get_data()));
463
18.4E
            } else {
464
18.4E
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
465
18.4E
                                                    roffsets, path_null_maps[0], res->get_data(),
466
18.4E
                                                    null_map->get_data()));
467
18.4E
            }
468
1.31k
        }
469
470
1.31k
        block.get_by_position(result).column =
471
1.31k
                ColumnNullable::create(std::move(res), std::move(null_map));
472
1.31k
        return Status::OK();
473
1.31k
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
376
9.96k
                        uint32_t result, size_t input_rows_count) const override {
377
9.96k
        DCHECK_GE(arguments.size(), 2);
378
379
9.96k
        ColumnPtr jsonb_data_column;
380
9.96k
        bool jsonb_data_const = false;
381
9.96k
        const NullMap* data_null_map = nullptr;
382
383
9.96k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
384
9.96k
            PrimitiveType::TYPE_JSONB) {
385
1
            return Status::InvalidArgument(
386
1
                    "jsonb_extract first argument should be json type, but got {}",
387
1
                    block.get_by_position(arguments[0]).type->get_name());
388
1
        }
389
390
        // prepare jsonb data column
391
9.96k
        std::tie(jsonb_data_column, jsonb_data_const) =
392
9.96k
                unpack_if_const(block.get_by_position(arguments[0]).column);
393
9.96k
        if (jsonb_data_column->is_nullable()) {
394
8.52k
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
395
8.52k
            jsonb_data_column = nullable_column.get_nested_column_ptr();
396
8.52k
            data_null_map = &nullable_column.get_null_map_data();
397
8.52k
        }
398
9.96k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
399
9.96k
        const auto& loffsets =
400
9.96k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
401
402
        // prepare parse path column prepare
403
9.96k
        std::vector<const ColumnString*> jsonb_path_columns;
404
9.96k
        std::vector<bool> path_const(arguments.size() - 1);
405
9.96k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
406
20.2k
        for (int i = 0; i < arguments.size() - 1; ++i) {
407
10.2k
            ColumnPtr path_column;
408
10.2k
            bool is_const = false;
409
10.2k
            std::tie(path_column, is_const) =
410
10.2k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
411
10.2k
            path_const[i] = is_const;
412
10.2k
            if (path_column->is_nullable()) {
413
56
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
414
56
                path_column = nullable_column.get_nested_column_ptr();
415
56
                path_null_maps[i] = &nullable_column.get_null_map_data();
416
56
            }
417
10.2k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
418
10.2k
        }
419
420
9.96k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
421
9.96k
        auto res = Impl::ColumnType::create();
422
423
        // execute Impl
424
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
425
9.96k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
426
9.96k
            auto& res_data = res->get_chars();
427
9.96k
            auto& res_offsets = res->get_offsets();
428
9.96k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
429
9.96k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
430
9.96k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
431
        } else {
432
            // not support other extract type for now (e.g. int, double, ...)
433
            DCHECK_EQ(jsonb_path_columns.size(), 1);
434
            const auto& rdata = jsonb_path_columns[0]->get_chars();
435
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
436
437
            auto create_all_null_result = [&]() {
438
                res = Impl::ColumnType::create();
439
                res->insert_default();
440
                auto nullable_column =
441
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
442
                auto const_column =
443
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
444
                block.get_by_position(result).column = std::move(const_column);
445
                return Status::OK();
446
            };
447
448
            if (jsonb_data_const) {
449
                if (data_null_map && (*data_null_map)[0]) {
450
                    return create_all_null_result();
451
                }
452
453
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
454
                                                    rdata, roffsets, path_null_maps[0],
455
                                                    res->get_data(), null_map->get_data()));
456
            } else if (path_const[0]) {
457
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
458
                    return create_all_null_result();
459
                }
460
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
461
                                                    jsonb_path_columns[0]->get_data_at(0),
462
                                                    res->get_data(), null_map->get_data()));
463
            } else {
464
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
465
                                                    roffsets, path_null_maps[0], res->get_data(),
466
                                                    null_map->get_data()));
467
            }
468
        }
469
470
9.95k
        block.get_by_position(result).column =
471
9.96k
                ColumnNullable::create(std::move(res), std::move(null_map));
472
9.96k
        return Status::OK();
473
9.96k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
376
6
                        uint32_t result, size_t input_rows_count) const override {
377
6
        DCHECK_GE(arguments.size(), 2);
378
379
6
        ColumnPtr jsonb_data_column;
380
6
        bool jsonb_data_const = false;
381
6
        const NullMap* data_null_map = nullptr;
382
383
6
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
384
6
            PrimitiveType::TYPE_JSONB) {
385
0
            return Status::InvalidArgument(
386
0
                    "jsonb_extract first argument should be json type, but got {}",
387
0
                    block.get_by_position(arguments[0]).type->get_name());
388
0
        }
389
390
        // prepare jsonb data column
391
6
        std::tie(jsonb_data_column, jsonb_data_const) =
392
6
                unpack_if_const(block.get_by_position(arguments[0]).column);
393
6
        if (jsonb_data_column->is_nullable()) {
394
6
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*jsonb_data_column);
395
6
            jsonb_data_column = nullable_column.get_nested_column_ptr();
396
6
            data_null_map = &nullable_column.get_null_map_data();
397
6
        }
398
6
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
399
6
        const auto& loffsets =
400
6
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
401
402
        // prepare parse path column prepare
403
6
        std::vector<const ColumnString*> jsonb_path_columns;
404
6
        std::vector<bool> path_const(arguments.size() - 1);
405
6
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
406
16
        for (int i = 0; i < arguments.size() - 1; ++i) {
407
10
            ColumnPtr path_column;
408
10
            bool is_const = false;
409
10
            std::tie(path_column, is_const) =
410
10
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
411
10
            path_const[i] = is_const;
412
10
            if (path_column->is_nullable()) {
413
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
414
1
                path_column = nullable_column.get_nested_column_ptr();
415
1
                path_null_maps[i] = &nullable_column.get_null_map_data();
416
1
            }
417
10
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
418
10
        }
419
420
6
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
421
6
        auto res = Impl::ColumnType::create();
422
423
        // execute Impl
424
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
425
6
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
426
6
            auto& res_data = res->get_chars();
427
6
            auto& res_offsets = res->get_offsets();
428
6
            RETURN_IF_ERROR(Impl::vector_vector_v2(
429
6
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
430
6
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
431
        } else {
432
            // not support other extract type for now (e.g. int, double, ...)
433
            DCHECK_EQ(jsonb_path_columns.size(), 1);
434
            const auto& rdata = jsonb_path_columns[0]->get_chars();
435
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
436
437
            auto create_all_null_result = [&]() {
438
                res = Impl::ColumnType::create();
439
                res->insert_default();
440
                auto nullable_column =
441
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
442
                auto const_column =
443
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
444
                block.get_by_position(result).column = std::move(const_column);
445
                return Status::OK();
446
            };
447
448
            if (jsonb_data_const) {
449
                if (data_null_map && (*data_null_map)[0]) {
450
                    return create_all_null_result();
451
                }
452
453
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
454
                                                    rdata, roffsets, path_null_maps[0],
455
                                                    res->get_data(), null_map->get_data()));
456
            } else if (path_const[0]) {
457
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
458
                    return create_all_null_result();
459
                }
460
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
461
                                                    jsonb_path_columns[0]->get_data_at(0),
462
                                                    res->get_data(), null_map->get_data()));
463
            } else {
464
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
465
                                                    roffsets, path_null_maps[0], res->get_data(),
466
                                                    null_map->get_data()));
467
            }
468
        }
469
470
6
        block.get_by_position(result).column =
471
6
                ColumnNullable::create(std::move(res), std::move(null_map));
472
6
        return Status::OK();
473
6
    }
474
};
475
476
class FunctionJsonbKeys : public IFunction {
477
public:
478
    static constexpr auto name = "json_keys";
479
    static constexpr auto alias = "jsonb_keys";
480
48
    static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); }
481
0
    String get_name() const override { return name; }
482
40
    bool is_variadic() const override { return true; }
483
0
    size_t get_number_of_arguments() const override { return 0; }
484
485
141
    bool use_default_implementation_for_nulls() const override { return false; }
486
487
39
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
488
39
        return make_nullable(
489
39
                std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
490
39
    }
491
492
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
493
102
                        uint32_t result, size_t input_rows_count) const override {
494
102
        DCHECK_GE(arguments.size(), 1);
495
102
        DCHECK(arguments.size() == 1 || arguments.size() == 2)
496
0
                << "json_keys should have 1 or 2 arguments, but got " << arguments.size();
497
498
102
        const NullMap* data_null_map = nullptr;
499
102
        const ColumnString* col_from_string = nullptr;
500
        // prepare jsonb data column
501
102
        auto&& [jsonb_data_column, json_data_const] =
502
102
                unpack_if_const(block.get_by_position(arguments[0]).column);
503
102
        if (jsonb_data_column->is_nullable()) {
504
98
            const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get());
505
98
            col_from_string =
506
98
                    assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
507
98
            data_null_map = &nullable->get_null_map_data();
508
98
        } else {
509
4
            col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get());
510
4
        }
511
512
        // prepare parse path column prepare, maybe we do not have path column
513
102
        ColumnPtr jsonb_path_column = nullptr;
514
102
        const ColumnString* jsonb_path_col = nullptr;
515
102
        bool path_const = false;
516
102
        const NullMap* path_null_map = nullptr;
517
102
        if (arguments.size() == 2) {
518
            // we have should have a ColumnString for path
519
75
            std::tie(jsonb_path_column, path_const) =
520
75
                    unpack_if_const(block.get_by_position(arguments[1]).column);
521
75
            if (jsonb_path_column->is_nullable()) {
522
10
                const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_path_column.get());
523
10
                jsonb_path_column = nullable->get_nested_column_ptr();
524
10
                path_null_map = &nullable->get_null_map_data();
525
10
            }
526
75
            jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get());
527
75
        }
528
529
102
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
530
102
        NullMap& res_null_map = null_map->get_data();
531
532
102
        auto dst_arr = ColumnArray::create(
533
102
                ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
534
102
                ColumnArray::ColumnOffsets::create());
535
102
        auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data());
536
537
102
        Status st = std::visit(
538
102
                [&](auto data_const, auto has_path, auto path_const) {
539
102
                    return inner_loop_impl<data_const, has_path, path_const>(
540
102
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
541
102
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
542
102
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
538
27
                [&](auto data_const, auto has_path, auto path_const) {
539
27
                    return inner_loop_impl<data_const, has_path, path_const>(
540
27
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
541
27
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
542
27
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
538
25
                [&](auto data_const, auto has_path, auto path_const) {
539
25
                    return inner_loop_impl<data_const, has_path, path_const>(
540
25
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
541
25
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
542
25
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
538
48
                [&](auto data_const, auto has_path, auto path_const) {
539
48
                    return inner_loop_impl<data_const, has_path, path_const>(
540
48
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
541
48
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
542
48
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
538
2
                [&](auto data_const, auto has_path, auto path_const) {
539
2
                    return inner_loop_impl<data_const, has_path, path_const>(
540
2
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
541
2
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
542
2
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
543
102
                make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column),
544
102
                make_bool_variant(path_const));
545
102
        if (!st.ok()) {
546
9
            return st;
547
9
        }
548
93
        block.get_by_position(result).column =
549
93
                ColumnNullable::create(std::move(dst_arr), std::move(null_map));
550
93
        return st;
551
102
    }
552
553
private:
554
    template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
555
    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr,
556
                                                ColumnNullable& dst_nested_column,
557
                                                NullMap& res_null_map,
558
                                                const ColumnString& col_from_string,
559
                                                const NullMap* jsonb_data_nullmap,
560
                                                const ColumnString* jsonb_path_column,
561
102
                                                const NullMap* path_null_map) {
562
        // if path is const, we just need to parse it once
563
102
        JsonbPath const_path;
564
102
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
565
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
566
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
567
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
568
1
                                               r_raw_ref.to_string());
569
1
            }
570
571
47
            if (const_path.is_wildcard()) {
572
2
                return Status::InvalidJsonPath(
573
2
                        "In this situation, path expressions may not contain the * and ** tokens "
574
2
                        "or an array range.");
575
2
            }
576
47
        }
577
578
378
        for (size_t i = 0; i < input_rows_count; ++i) {
579
268
            auto index = index_check_const(i, JSONB_DATA_CONST);
580
            // if jsonb data is null or path column is null , we should return null
581
268
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
582
23
                res_null_map[i] = 1;
583
23
                dst_arr.insert_default();
584
23
                continue;
585
23
            }
586
245
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
587
69
                if (path_null_map && (*path_null_map)[i]) {
588
8
                    res_null_map[i] = 1;
589
8
                    dst_arr.insert_default();
590
8
                    continue;
591
8
                }
592
69
            }
593
594
61
            auto json_data = col_from_string.get_data_at(index);
595
245
            const JsonbDocument* doc = nullptr;
596
245
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
597
245
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
598
0
                dst_arr.clear();
599
0
                return Status::InvalidArgument("jsonb data is invalid");
600
0
            }
601
245
            const JsonbValue* obj_val;
602
245
            JsonbFindResult find_result;
603
245
            if constexpr (JSONB_PATH_PARAM) {
604
191
                if constexpr (!JSON_PATH_CONST) {
605
69
                    auto data = jsonb_path_column->get_data_at(i);
606
69
                    JsonbPath path;
607
69
                    if (!path.seek(data.data, data.size)) {
608
5
                        return Status::InvalidArgument(
609
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
610
5
                                std::string_view(data.data, data.size), i);
611
5
                    }
612
613
64
                    if (path.is_wildcard()) {
614
1
                        return Status::InvalidJsonPath(
615
1
                                "In this situation, path expressions may not contain the * and ** "
616
1
                                "tokens "
617
1
                                "or an array range. at row: {}",
618
1
                                i);
619
1
                    }
620
63
                    find_result = doc->getValue()->findValue(path);
621
122
                } else {
622
122
                    find_result = doc->getValue()->findValue(const_path);
623
122
                }
624
0
                obj_val = find_result.value;
625
191
            } else {
626
54
                obj_val = doc->getValue();
627
54
            }
628
629
245
            if (!obj_val || !obj_val->isObject()) {
630
                // if jsonb data is not object we should return null
631
182
                res_null_map[i] = 1;
632
182
                dst_arr.insert_default();
633
182
                continue;
634
182
            }
635
63
            const auto* obj = obj_val->unpack<ObjectVal>();
636
75
            for (const auto& it : *obj) {
637
75
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
638
75
            }
639
63
            dst_arr.get_offsets().push_back(dst_nested_column.size());
640
63
        } //for
641
110
        return Status::OK();
642
102
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
561
27
                                                const NullMap* path_null_map) {
562
        // if path is const, we just need to parse it once
563
27
        JsonbPath const_path;
564
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
565
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
566
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
567
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
568
                                               r_raw_ref.to_string());
569
            }
570
571
            if (const_path.is_wildcard()) {
572
                return Status::InvalidJsonPath(
573
                        "In this situation, path expressions may not contain the * and ** tokens "
574
                        "or an array range.");
575
            }
576
        }
577
578
85
        for (size_t i = 0; i < input_rows_count; ++i) {
579
58
            auto index = index_check_const(i, JSONB_DATA_CONST);
580
            // if jsonb data is null or path column is null , we should return null
581
58
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
582
4
                res_null_map[i] = 1;
583
4
                dst_arr.insert_default();
584
4
                continue;
585
4
            }
586
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
587
                if (path_null_map && (*path_null_map)[i]) {
588
                    res_null_map[i] = 1;
589
                    dst_arr.insert_default();
590
                    continue;
591
                }
592
            }
593
594
54
            auto json_data = col_from_string.get_data_at(index);
595
54
            const JsonbDocument* doc = nullptr;
596
54
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
597
54
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
598
0
                dst_arr.clear();
599
0
                return Status::InvalidArgument("jsonb data is invalid");
600
0
            }
601
54
            const JsonbValue* obj_val;
602
54
            JsonbFindResult find_result;
603
            if constexpr (JSONB_PATH_PARAM) {
604
                if constexpr (!JSON_PATH_CONST) {
605
                    auto data = jsonb_path_column->get_data_at(i);
606
                    JsonbPath path;
607
                    if (!path.seek(data.data, data.size)) {
608
                        return Status::InvalidArgument(
609
                                "Json path error: Invalid Json Path for value: {} at row: {}",
610
                                std::string_view(data.data, data.size), i);
611
                    }
612
613
                    if (path.is_wildcard()) {
614
                        return Status::InvalidJsonPath(
615
                                "In this situation, path expressions may not contain the * and ** "
616
                                "tokens "
617
                                "or an array range. at row: {}",
618
                                i);
619
                    }
620
                    find_result = doc->getValue()->findValue(path);
621
                } else {
622
                    find_result = doc->getValue()->findValue(const_path);
623
                }
624
                obj_val = find_result.value;
625
54
            } else {
626
54
                obj_val = doc->getValue();
627
54
            }
628
629
54
            if (!obj_val || !obj_val->isObject()) {
630
                // if jsonb data is not object we should return null
631
36
                res_null_map[i] = 1;
632
36
                dst_arr.insert_default();
633
36
                continue;
634
36
            }
635
18
            const auto* obj = obj_val->unpack<ObjectVal>();
636
36
            for (const auto& it : *obj) {
637
36
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
638
36
            }
639
18
            dst_arr.get_offsets().push_back(dst_nested_column.size());
640
18
        } //for
641
27
        return Status::OK();
642
27
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
561
25
                                                const NullMap* path_null_map) {
562
        // if path is const, we just need to parse it once
563
25
        JsonbPath const_path;
564
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
565
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
566
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
567
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
568
                                               r_raw_ref.to_string());
569
            }
570
571
            if (const_path.is_wildcard()) {
572
                return Status::InvalidJsonPath(
573
                        "In this situation, path expressions may not contain the * and ** tokens "
574
                        "or an array range.");
575
            }
576
        }
577
578
80
        for (size_t i = 0; i < input_rows_count; ++i) {
579
51
            auto index = index_check_const(i, JSONB_DATA_CONST);
580
            // if jsonb data is null or path column is null , we should return null
581
51
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
582
6
                res_null_map[i] = 1;
583
6
                dst_arr.insert_default();
584
6
                continue;
585
6
            }
586
45
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
587
45
                if (path_null_map && (*path_null_map)[i]) {
588
4
                    res_null_map[i] = 1;
589
4
                    dst_arr.insert_default();
590
4
                    continue;
591
4
                }
592
45
            }
593
594
41
            auto json_data = col_from_string.get_data_at(index);
595
45
            const JsonbDocument* doc = nullptr;
596
45
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
597
45
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
598
0
                dst_arr.clear();
599
0
                return Status::InvalidArgument("jsonb data is invalid");
600
0
            }
601
45
            const JsonbValue* obj_val;
602
45
            JsonbFindResult find_result;
603
45
            if constexpr (JSONB_PATH_PARAM) {
604
45
                if constexpr (!JSON_PATH_CONST) {
605
45
                    auto data = jsonb_path_column->get_data_at(i);
606
45
                    JsonbPath path;
607
45
                    if (!path.seek(data.data, data.size)) {
608
5
                        return Status::InvalidArgument(
609
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
610
5
                                std::string_view(data.data, data.size), i);
611
5
                    }
612
613
40
                    if (path.is_wildcard()) {
614
1
                        return Status::InvalidJsonPath(
615
1
                                "In this situation, path expressions may not contain the * and ** "
616
1
                                "tokens "
617
1
                                "or an array range. at row: {}",
618
1
                                i);
619
1
                    }
620
39
                    find_result = doc->getValue()->findValue(path);
621
                } else {
622
                    find_result = doc->getValue()->findValue(const_path);
623
                }
624
0
                obj_val = find_result.value;
625
            } else {
626
                obj_val = doc->getValue();
627
            }
628
629
45
            if (!obj_val || !obj_val->isObject()) {
630
                // if jsonb data is not object we should return null
631
25
                res_null_map[i] = 1;
632
25
                dst_arr.insert_default();
633
25
                continue;
634
25
            }
635
20
            const auto* obj = obj_val->unpack<ObjectVal>();
636
20
            for (const auto& it : *obj) {
637
14
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
638
14
            }
639
20
            dst_arr.get_offsets().push_back(dst_nested_column.size());
640
20
        } //for
641
29
        return Status::OK();
642
25
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
561
48
                                                const NullMap* path_null_map) {
562
        // if path is const, we just need to parse it once
563
48
        JsonbPath const_path;
564
48
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
565
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
566
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
567
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
568
1
                                               r_raw_ref.to_string());
569
1
            }
570
571
47
            if (const_path.is_wildcard()) {
572
2
                return Status::InvalidJsonPath(
573
2
                        "In this situation, path expressions may not contain the * and ** tokens "
574
2
                        "or an array range.");
575
2
            }
576
47
        }
577
578
183
        for (size_t i = 0; i < input_rows_count; ++i) {
579
135
            auto index = index_check_const(i, JSONB_DATA_CONST);
580
            // if jsonb data is null or path column is null , we should return null
581
135
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
582
13
                res_null_map[i] = 1;
583
13
                dst_arr.insert_default();
584
13
                continue;
585
13
            }
586
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
587
                if (path_null_map && (*path_null_map)[i]) {
588
                    res_null_map[i] = 1;
589
                    dst_arr.insert_default();
590
                    continue;
591
                }
592
            }
593
594
122
            auto json_data = col_from_string.get_data_at(index);
595
122
            const JsonbDocument* doc = nullptr;
596
122
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
597
122
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
598
0
                dst_arr.clear();
599
0
                return Status::InvalidArgument("jsonb data is invalid");
600
0
            }
601
122
            const JsonbValue* obj_val;
602
122
            JsonbFindResult find_result;
603
122
            if constexpr (JSONB_PATH_PARAM) {
604
                if constexpr (!JSON_PATH_CONST) {
605
                    auto data = jsonb_path_column->get_data_at(i);
606
                    JsonbPath path;
607
                    if (!path.seek(data.data, data.size)) {
608
                        return Status::InvalidArgument(
609
                                "Json path error: Invalid Json Path for value: {} at row: {}",
610
                                std::string_view(data.data, data.size), i);
611
                    }
612
613
                    if (path.is_wildcard()) {
614
                        return Status::InvalidJsonPath(
615
                                "In this situation, path expressions may not contain the * and ** "
616
                                "tokens "
617
                                "or an array range. at row: {}",
618
                                i);
619
                    }
620
                    find_result = doc->getValue()->findValue(path);
621
122
                } else {
622
122
                    find_result = doc->getValue()->findValue(const_path);
623
122
                }
624
122
                obj_val = find_result.value;
625
            } else {
626
                obj_val = doc->getValue();
627
            }
628
629
122
            if (!obj_val || !obj_val->isObject()) {
630
                // if jsonb data is not object we should return null
631
113
                res_null_map[i] = 1;
632
113
                dst_arr.insert_default();
633
113
                continue;
634
113
            }
635
9
            const auto* obj = obj_val->unpack<ObjectVal>();
636
9
            for (const auto& it : *obj) {
637
9
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
638
9
            }
639
9
            dst_arr.get_offsets().push_back(dst_nested_column.size());
640
9
        } //for
641
48
        return Status::OK();
642
48
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
561
2
                                                const NullMap* path_null_map) {
562
        // if path is const, we just need to parse it once
563
2
        JsonbPath const_path;
564
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
565
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
566
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
567
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
568
                                               r_raw_ref.to_string());
569
            }
570
571
            if (const_path.is_wildcard()) {
572
                return Status::InvalidJsonPath(
573
                        "In this situation, path expressions may not contain the * and ** tokens "
574
                        "or an array range.");
575
            }
576
        }
577
578
30
        for (size_t i = 0; i < input_rows_count; ++i) {
579
24
            auto index = index_check_const(i, JSONB_DATA_CONST);
580
            // if jsonb data is null or path column is null , we should return null
581
24
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
582
0
                res_null_map[i] = 1;
583
0
                dst_arr.insert_default();
584
0
                continue;
585
0
            }
586
24
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
587
24
                if (path_null_map && (*path_null_map)[i]) {
588
4
                    res_null_map[i] = 1;
589
4
                    dst_arr.insert_default();
590
4
                    continue;
591
4
                }
592
24
            }
593
594
20
            auto json_data = col_from_string.get_data_at(index);
595
24
            const JsonbDocument* doc = nullptr;
596
24
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
597
24
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
598
0
                dst_arr.clear();
599
0
                return Status::InvalidArgument("jsonb data is invalid");
600
0
            }
601
24
            const JsonbValue* obj_val;
602
24
            JsonbFindResult find_result;
603
24
            if constexpr (JSONB_PATH_PARAM) {
604
24
                if constexpr (!JSON_PATH_CONST) {
605
24
                    auto data = jsonb_path_column->get_data_at(i);
606
24
                    JsonbPath path;
607
24
                    if (!path.seek(data.data, data.size)) {
608
0
                        return Status::InvalidArgument(
609
0
                                "Json path error: Invalid Json Path for value: {} at row: {}",
610
0
                                std::string_view(data.data, data.size), i);
611
0
                    }
612
613
24
                    if (path.is_wildcard()) {
614
0
                        return Status::InvalidJsonPath(
615
0
                                "In this situation, path expressions may not contain the * and ** "
616
0
                                "tokens "
617
0
                                "or an array range. at row: {}",
618
0
                                i);
619
0
                    }
620
24
                    find_result = doc->getValue()->findValue(path);
621
                } else {
622
                    find_result = doc->getValue()->findValue(const_path);
623
                }
624
0
                obj_val = find_result.value;
625
            } else {
626
                obj_val = doc->getValue();
627
            }
628
629
24
            if (!obj_val || !obj_val->isObject()) {
630
                // if jsonb data is not object we should return null
631
8
                res_null_map[i] = 1;
632
8
                dst_arr.insert_default();
633
8
                continue;
634
8
            }
635
16
            const auto* obj = obj_val->unpack<ObjectVal>();
636
16
            for (const auto& it : *obj) {
637
16
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
638
16
            }
639
16
            dst_arr.get_offsets().push_back(dst_nested_column.size());
640
16
        } //for
641
6
        return Status::OK();
642
2
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
643
};
644
645
class FunctionJsonbExtractPath : public IFunction {
646
public:
647
    static constexpr auto name = "json_exists_path";
648
    static constexpr auto alias = "jsonb_exists_path";
649
    using ColumnType = ColumnUInt8;
650
    using Container = typename ColumnType::Container;
651
183
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); }
652
1
    String get_name() const override { return name; }
653
174
    size_t get_number_of_arguments() const override { return 2; }
654
174
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
655
        // it only needs to indicate existence and does not need to return nullable values.
656
174
        const auto nullable = std::ranges::any_of(
657
196
                arguments, [](const DataTypePtr& type) { return type->is_nullable(); });
658
174
        if (nullable) {
659
153
            return make_nullable(std::make_shared<DataTypeUInt8>());
660
153
        } else {
661
21
            return std::make_shared<DataTypeUInt8>();
662
21
        }
663
174
    }
664
665
1.53k
    bool use_default_implementation_for_nulls() const override { return false; }
666
667
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
668
1.35k
                        uint32_t result, size_t input_rows_count) const override {
669
        // prepare jsonb data column
670
1.35k
        auto&& [jsonb_data_column, jsonb_data_const] =
671
1.35k
                unpack_if_const(block.get_by_position(arguments[0]).column);
672
673
1.35k
        const NullMap* data_null_map = nullptr;
674
1.35k
        const ColumnString* data_col = nullptr;
675
1.35k
        if (jsonb_data_column->is_nullable()) {
676
1.17k
            const auto* nullable = assert_cast<const ColumnNullable*>(jsonb_data_column.get());
677
1.17k
            data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
678
1.17k
            data_null_map = &nullable->get_null_map_data();
679
1.17k
        } else {
680
182
            data_col = assert_cast<const ColumnString*>(jsonb_data_column.get());
681
182
        }
682
683
1.35k
        const auto& ldata = data_col->get_chars();
684
1.35k
        const auto& loffsets = data_col->get_offsets();
685
686
        // prepare parse path column prepare
687
1.35k
        auto&& [path_column, path_const] =
688
1.35k
                unpack_if_const(block.get_by_position(arguments[1]).column);
689
1.35k
        const ColumnString* path_col = nullptr;
690
1.35k
        const NullMap* path_null_map = nullptr;
691
1.35k
        if (path_column->is_nullable()) {
692
7
            const auto* nullable = assert_cast<const ColumnNullable*>(path_column.get());
693
7
            path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
694
7
            path_null_map = &nullable->get_null_map_data();
695
1.34k
        } else {
696
1.34k
            path_col = assert_cast<const ColumnString*>(path_column.get());
697
1.34k
        }
698
699
1.35k
        DCHECK(!(jsonb_data_const && path_const))
700
0
                << "jsonb_data_const and path_const should not be both const";
701
702
1.35k
        auto create_all_null_result = [&]() {
703
3
            auto res = ColumnType::create();
704
3
            res->insert_default();
705
3
            auto nullable_column =
706
3
                    ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
707
3
            auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count);
708
3
            block.get_by_position(result).column = std::move(const_column);
709
3
            return Status::OK();
710
3
        };
711
712
1.35k
        MutableColumnPtr result_null_map_column;
713
1.35k
        NullMap* result_null_map = nullptr;
714
1.35k
        if (data_null_map || path_null_map) {
715
1.17k
            result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
716
1.17k
            result_null_map = &static_cast<ColumnUInt8&>(*result_null_map_column).get_data();
717
718
1.17k
            if (data_null_map) {
719
1.17k
                VectorizedUtils::update_null_map(*result_null_map, *data_null_map,
720
1.17k
                                                 jsonb_data_const);
721
1.17k
            }
722
723
1.17k
            if (path_null_map) {
724
7
                VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const);
725
7
            }
726
727
1.17k
            if (!simd::contain_zero(result_null_map->data(), input_rows_count)) {
728
3
                return create_all_null_result();
729
3
            }
730
1.17k
        }
731
732
1.35k
        auto res = ColumnType::create();
733
734
1.35k
        bool is_invalid_json_path = false;
735
736
1.35k
        const auto& rdata = path_col->get_chars();
737
1.35k
        const auto& roffsets = path_col->get_offsets();
738
1.35k
        if (jsonb_data_const) {
739
2
            if (data_null_map && (*data_null_map)[0]) {
740
0
                return create_all_null_result();
741
0
            }
742
2
            scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(),
743
2
                          result_null_map, is_invalid_json_path);
744
1.35k
        } else if (path_const) {
745
1.32k
            if (path_null_map && (*path_null_map)[0]) {
746
0
                return create_all_null_result();
747
0
            }
748
1.32k
            vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(),
749
1.32k
                          result_null_map, is_invalid_json_path);
750
1.32k
        } else {
751
30
            vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
752
30
                          result_null_map, is_invalid_json_path);
753
30
        }
754
1.35k
        if (is_invalid_json_path) {
755
7
            return Status::InvalidArgument(
756
7
                    "Json path error: Invalid Json Path for value: {}",
757
7
                    std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
758
7
        }
759
760
1.34k
        if (result_null_map) {
761
1.17k
            auto nullabel_col =
762
1.17k
                    ColumnNullable::create(std::move(res), std::move(result_null_map_column));
763
1.17k
            block.get_by_position(result).column = std::move(nullabel_col);
764
1.17k
        } else {
765
175
            block.get_by_position(result).column = std::move(res);
766
175
        }
767
1.34k
        return Status::OK();
768
1.35k
    }
769
770
private:
771
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
772
3.01k
                                              size_t l_str_size, JsonbPath& path) {
773
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
774
3.01k
        const JsonbDocument* doc = nullptr;
775
3.01k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
776
3.01k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
777
0
            return;
778
0
        }
779
780
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
781
3.01k
        auto result = doc->getValue()->findValue(path);
782
783
3.01k
        if (result.value) {
784
445
            res[i] = 1;
785
445
        }
786
3.01k
    }
787
    static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
788
                              const ColumnString::Offsets& loffsets,
789
                              const ColumnString::Chars& rdata,
790
                              const ColumnString::Offsets& roffsets, Container& res,
791
35
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
792
35
        const size_t size = loffsets.size();
793
35
        res.resize_fill(size, 0);
794
795
80
        for (size_t i = 0; i < size; i++) {
796
50
            if (result_null_map && (*result_null_map)[i]) {
797
8
                continue;
798
8
            }
799
800
42
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
801
42
            int l_str_size = loffsets[i] - loffsets[i - 1];
802
803
42
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
804
42
            int r_str_size = roffsets[i] - roffsets[i - 1];
805
806
42
            JsonbPath path;
807
42
            if (!path.seek(r_raw_str, r_str_size)) {
808
5
                is_invalid_json_path = true;
809
5
                return;
810
5
            }
811
812
37
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
813
37
        }
814
35
    }
815
    static void scalar_vector(FunctionContext* context, const StringRef& ldata,
816
                              const ColumnString::Chars& rdata,
817
                              const ColumnString::Offsets& roffsets, Container& res,
818
2
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
819
2
        const size_t size = roffsets.size();
820
2
        res.resize_fill(size, 0);
821
822
14
        for (size_t i = 0; i < size; i++) {
823
13
            if (result_null_map && (*result_null_map)[i]) {
824
4
                continue;
825
4
            }
826
9
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
827
9
            int r_str_size = roffsets[i] - roffsets[i - 1];
828
829
9
            JsonbPath path;
830
9
            if (!path.seek(r_raw_str, r_str_size)) {
831
1
                is_invalid_json_path = true;
832
1
                return;
833
1
            }
834
835
8
            inner_loop_impl(i, res, ldata.data, ldata.size, path);
836
8
        }
837
2
    }
838
    static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
839
                              const ColumnString::Offsets& loffsets, const StringRef& rdata,
840
                              Container& res, const NullMap* result_null_map,
841
1.32k
                              bool& is_invalid_json_path) {
842
1.32k
        const size_t size = loffsets.size();
843
1.32k
        res.resize_fill(size, 0);
844
845
1.32k
        JsonbPath path;
846
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
847
1
            is_invalid_json_path = true;
848
1
            return;
849
1
        }
850
851
4.51k
        for (size_t i = 0; i < size; i++) {
852
3.19k
            if (result_null_map && (*result_null_map)[i]) {
853
232
                continue;
854
232
            }
855
2.96k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
856
2.96k
            int l_str_size = loffsets[i] - loffsets[i - 1];
857
858
2.96k
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
859
2.96k
        }
860
1.32k
    }
861
};
862
863
template <typename ValueType>
864
struct JsonbExtractStringImpl {
865
    using ReturnType = typename ValueType::ReturnType;
866
    using ColumnType = typename ValueType::ColumnType;
867
868
private:
869
    static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i,
870
                                              ColumnString::Chars& res_data,
871
                                              ColumnString::Offsets& res_offsets, NullMap& null_map,
872
                                              std::unique_ptr<JsonbToJson>& formater,
873
141k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
874
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
875
141k
        const JsonbDocument* doc = nullptr;
876
141k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
877
141k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
878
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
0
            return;
880
0
        }
881
882
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
883
141k
        auto find_result = doc->getValue()->findValue(path);
884
885
141k
        if (UNLIKELY(!find_result.value)) {
886
20.9k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
887
20.9k
            return;
888
20.9k
        }
889
890
120k
        if constexpr (ValueType::only_get_type) {
891
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
892
429
                                        res_data, res_offsets);
893
429
            return;
894
119k
        } else {
895
119k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
896
119k
            if constexpr (ValueType::no_quotes) {
897
2
                if (find_result.value->isString()) {
898
1
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
899
1
                    const auto* blob = str_value->getBlob();
900
1
                    if (str_value->length() > 1 && blob[0] == '"' &&
901
1
                        blob[str_value->length() - 1] == '"') {
902
0
                        writer->writeStartString();
903
0
                        writer->writeString(blob + 1, str_value->length() - 2);
904
0
                        writer->writeEndString();
905
0
                        StringOP::push_value_string(
906
0
                                std::string_view(writer->getOutput()->getBuffer(),
907
0
                                                 writer->getOutput()->getSize()),
908
0
                                i, res_data, res_offsets);
909
0
                        return;
910
0
                    }
911
1
                }
912
2
            }
913
2
            writer->writeValueSimple(find_result.value);
914
119k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
915
119k
                                                         writer->getOutput()->getSize()),
916
119k
                                        i, res_data, res_offsets);
917
119k
        }
918
120k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
873
2.98k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
874
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
875
2.98k
        const JsonbDocument* doc = nullptr;
876
2.98k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
877
2.98k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
878
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
0
            return;
880
0
        }
881
882
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
883
2.98k
        auto find_result = doc->getValue()->findValue(path);
884
885
2.98k
        if (UNLIKELY(!find_result.value)) {
886
2.55k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
887
2.55k
            return;
888
2.55k
        }
889
890
429
        if constexpr (ValueType::only_get_type) {
891
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
892
429
                                        res_data, res_offsets);
893
429
            return;
894
        } else {
895
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
896
            if constexpr (ValueType::no_quotes) {
897
                if (find_result.value->isString()) {
898
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
899
                    const auto* blob = str_value->getBlob();
900
                    if (str_value->length() > 1 && blob[0] == '"' &&
901
                        blob[str_value->length() - 1] == '"') {
902
                        writer->writeStartString();
903
                        writer->writeString(blob + 1, str_value->length() - 2);
904
                        writer->writeEndString();
905
                        StringOP::push_value_string(
906
                                std::string_view(writer->getOutput()->getBuffer(),
907
                                                 writer->getOutput()->getSize()),
908
                                i, res_data, res_offsets);
909
                        return;
910
                    }
911
                }
912
            }
913
            writer->writeValueSimple(find_result.value);
914
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
915
                                                         writer->getOutput()->getSize()),
916
                                        i, res_data, res_offsets);
917
        }
918
429
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
873
138k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
874
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
875
138k
        const JsonbDocument* doc = nullptr;
876
138k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
877
138k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
878
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
0
            return;
880
0
        }
881
882
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
883
138k
        auto find_result = doc->getValue()->findValue(path);
884
885
138k
        if (UNLIKELY(!find_result.value)) {
886
18.3k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
887
18.3k
            return;
888
18.3k
        }
889
890
        if constexpr (ValueType::only_get_type) {
891
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
892
                                        res_data, res_offsets);
893
            return;
894
119k
        } else {
895
119k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
896
            if constexpr (ValueType::no_quotes) {
897
                if (find_result.value->isString()) {
898
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
899
                    const auto* blob = str_value->getBlob();
900
                    if (str_value->length() > 1 && blob[0] == '"' &&
901
                        blob[str_value->length() - 1] == '"') {
902
                        writer->writeStartString();
903
                        writer->writeString(blob + 1, str_value->length() - 2);
904
                        writer->writeEndString();
905
                        StringOP::push_value_string(
906
                                std::string_view(writer->getOutput()->getBuffer(),
907
                                                 writer->getOutput()->getSize()),
908
                                i, res_data, res_offsets);
909
                        return;
910
                    }
911
                }
912
            }
913
119k
            writer->writeValueSimple(find_result.value);
914
119k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
915
119k
                                                         writer->getOutput()->getSize()),
916
119k
                                        i, res_data, res_offsets);
917
119k
        }
918
119k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
873
2
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
874
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
875
2
        const JsonbDocument* doc = nullptr;
876
2
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
877
2
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
878
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
0
            return;
880
0
        }
881
882
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
883
2
        auto find_result = doc->getValue()->findValue(path);
884
885
2
        if (UNLIKELY(!find_result.value)) {
886
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
887
0
            return;
888
0
        }
889
890
        if constexpr (ValueType::only_get_type) {
891
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
892
                                        res_data, res_offsets);
893
            return;
894
2
        } else {
895
2
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
896
2
            if constexpr (ValueType::no_quotes) {
897
2
                if (find_result.value->isString()) {
898
1
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
899
1
                    const auto* blob = str_value->getBlob();
900
1
                    if (str_value->length() > 1 && blob[0] == '"' &&
901
1
                        blob[str_value->length() - 1] == '"') {
902
0
                        writer->writeStartString();
903
0
                        writer->writeString(blob + 1, str_value->length() - 2);
904
0
                        writer->writeEndString();
905
0
                        StringOP::push_value_string(
906
0
                                std::string_view(writer->getOutput()->getBuffer(),
907
0
                                                 writer->getOutput()->getSize()),
908
0
                                i, res_data, res_offsets);
909
0
                        return;
910
0
                    }
911
1
                }
912
2
            }
913
2
            writer->writeValueSimple(find_result.value);
914
2
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
915
2
                                                         writer->getOutput()->getSize()),
916
2
                                        i, res_data, res_offsets);
917
2
        }
918
2
    }
919
920
public:
921
    // for jsonb_extract_string
922
    static Status vector_vector_v2(
923
            FunctionContext* context, const ColumnString::Chars& ldata,
924
            const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
925
            const bool& json_data_const,
926
            const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
927
            const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const,
928
11.3k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
929
11.3k
        const size_t input_rows_count = null_map.size();
930
11.3k
        res_offsets.resize(input_rows_count);
931
932
11.3k
        auto writer = std::make_unique<JsonbWriter>();
933
11.3k
        std::unique_ptr<JsonbToJson> formater;
934
935
        // reuseable json path list, espacially for const path
936
11.3k
        std::vector<JsonbPath> json_path_list;
937
11.3k
        json_path_list.resize(rdata_columns.size());
938
939
        // lambda function to parse json path for row i and path pi
940
11.7k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
11.7k
            const auto index = index_check_const(i, path_const[pi]);
942
943
11.7k
            const ColumnString* path_col = rdata_columns[pi];
944
11.7k
            const ColumnString::Chars& rdata = path_col->get_chars();
945
11.7k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
11.7k
            size_t r_off = roffsets[index - 1];
947
11.7k
            size_t r_size = roffsets[index] - r_off;
948
11.7k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
11.7k
            JsonbPath path;
951
11.7k
            if (!path.seek(r_raw, r_size)) {
952
7
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
7
                                               std::string_view(r_raw, r_size));
954
7
            }
955
956
11.7k
            json_path_list[pi] = std::move(path);
957
958
11.7k
            return Status::OK();
959
11.7k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
940
1.34k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
1.34k
            const auto index = index_check_const(i, path_const[pi]);
942
943
1.34k
            const ColumnString* path_col = rdata_columns[pi];
944
1.34k
            const ColumnString::Chars& rdata = path_col->get_chars();
945
1.34k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
1.34k
            size_t r_off = roffsets[index - 1];
947
1.34k
            size_t r_size = roffsets[index] - r_off;
948
1.34k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
1.34k
            JsonbPath path;
951
1.34k
            if (!path.seek(r_raw, r_size)) {
952
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
1
                                               std::string_view(r_raw, r_size));
954
1
            }
955
956
1.33k
            json_path_list[pi] = std::move(path);
957
958
1.33k
            return Status::OK();
959
1.34k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
940
10.3k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
10.3k
            const auto index = index_check_const(i, path_const[pi]);
942
943
10.3k
            const ColumnString* path_col = rdata_columns[pi];
944
10.3k
            const ColumnString::Chars& rdata = path_col->get_chars();
945
10.3k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
10.3k
            size_t r_off = roffsets[index - 1];
947
10.3k
            size_t r_size = roffsets[index] - r_off;
948
10.3k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
10.3k
            JsonbPath path;
951
10.3k
            if (!path.seek(r_raw, r_size)) {
952
6
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
6
                                               std::string_view(r_raw, r_size));
954
6
            }
955
956
10.3k
            json_path_list[pi] = std::move(path);
957
958
10.3k
            return Status::OK();
959
10.3k
        };
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
940
8
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
8
            const auto index = index_check_const(i, path_const[pi]);
942
943
8
            const ColumnString* path_col = rdata_columns[pi];
944
8
            const ColumnString::Chars& rdata = path_col->get_chars();
945
8
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
8
            size_t r_off = roffsets[index - 1];
947
8
            size_t r_size = roffsets[index] - r_off;
948
8
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
8
            JsonbPath path;
951
8
            if (!path.seek(r_raw, r_size)) {
952
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
0
                                               std::string_view(r_raw, r_size));
954
0
            }
955
956
8
            json_path_list[pi] = std::move(path);
957
958
8
            return Status::OK();
959
8
        };
960
961
22.9k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
962
11.6k
            if (path_const[pi]) {
963
11.4k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
964
41
                    continue;
965
41
                }
966
11.3k
                RETURN_IF_ERROR(parse_json_path(0, pi));
967
11.3k
            }
968
11.6k
        }
969
970
11.3k
        res_data.reserve(ldata.size());
971
154k
        for (size_t i = 0; i < input_rows_count; ++i) {
972
143k
            if (null_map[i]) {
973
0
                continue;
974
0
            }
975
976
143k
            const auto data_index = index_check_const(i, json_data_const);
977
143k
            if (l_null_map && (*l_null_map)[data_index]) {
978
1.90k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
979
1.90k
                continue;
980
1.90k
            }
981
982
141k
            size_t l_off = loffsets[data_index - 1];
983
141k
            size_t l_size = loffsets[data_index] - l_off;
984
141k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
985
141k
            if (rdata_columns.size() == 1) { // just return origin value
986
141k
                const auto path_index = index_check_const(i, path_const[0]);
987
141k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
988
30
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
989
30
                    continue;
990
30
                }
991
992
141k
                if (!path_const[0]) {
993
288
                    RETURN_IF_ERROR(parse_json_path(i, 0));
994
288
                }
995
996
141k
                writer->reset();
997
141k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
998
141k
                                l_size, json_path_list[0]);
999
141k
            } else { // will make array string to user
1000
613
                writer->reset();
1001
613
                bool has_value = false;
1002
1003
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1004
613
                const JsonbDocument* doc = nullptr;
1005
613
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1006
1007
1.70k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1008
1.19k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1009
0
                        continue;
1010
0
                    }
1011
1012
1.19k
                    const auto path_index = index_check_const(i, path_const[pi]);
1013
1.19k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1014
99
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1015
99
                        break;
1016
99
                    }
1017
1018
1.09k
                    if (!path_const[pi]) {
1019
28
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1020
28
                    }
1021
1022
1.09k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1023
1024
1.09k
                    if (find_result.value) {
1025
255
                        if (!has_value) {
1026
141
                            has_value = true;
1027
141
                            writer->writeStartArray();
1028
141
                        }
1029
255
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1030
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1031
                            // if value is array, we should write all items in array, instead of write the array itself.
1032
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1033
45
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1034
45
                                writer->writeValue(&item);
1035
45
                            }
1036
235
                        } else {
1037
235
                            writer->writeValue(find_result.value);
1038
235
                        }
1039
255
                    }
1040
1.09k
                }
1041
613
                if (has_value) {
1042
141
                    writer->writeEndArray();
1043
141
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1044
141
                                                                 writer->getOutput()->getSize()),
1045
141
                                                i, res_data, res_offsets);
1046
472
                } else {
1047
472
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1048
472
                }
1049
613
            }
1050
141k
        } //for
1051
11.3k
        return Status::OK();
1052
11.3k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
928
1.32k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
929
1.32k
        const size_t input_rows_count = null_map.size();
930
1.32k
        res_offsets.resize(input_rows_count);
931
932
1.32k
        auto writer = std::make_unique<JsonbWriter>();
933
1.32k
        std::unique_ptr<JsonbToJson> formater;
934
935
        // reuseable json path list, espacially for const path
936
1.32k
        std::vector<JsonbPath> json_path_list;
937
1.32k
        json_path_list.resize(rdata_columns.size());
938
939
        // lambda function to parse json path for row i and path pi
940
1.32k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
1.32k
            const auto index = index_check_const(i, path_const[pi]);
942
943
1.32k
            const ColumnString* path_col = rdata_columns[pi];
944
1.32k
            const ColumnString::Chars& rdata = path_col->get_chars();
945
1.32k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
1.32k
            size_t r_off = roffsets[index - 1];
947
1.32k
            size_t r_size = roffsets[index] - r_off;
948
1.32k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
1.32k
            JsonbPath path;
951
1.32k
            if (!path.seek(r_raw, r_size)) {
952
1.32k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
1.32k
                                               std::string_view(r_raw, r_size));
954
1.32k
            }
955
956
1.32k
            json_path_list[pi] = std::move(path);
957
958
1.32k
            return Status::OK();
959
1.32k
        };
960
961
2.65k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
962
1.32k
            if (path_const[pi]) {
963
1.32k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
964
1
                    continue;
965
1
                }
966
1.32k
                RETURN_IF_ERROR(parse_json_path(0, pi));
967
1.32k
            }
968
1.32k
        }
969
970
1.32k
        res_data.reserve(ldata.size());
971
4.57k
        for (size_t i = 0; i < input_rows_count; ++i) {
972
3.24k
            if (null_map[i]) {
973
0
                continue;
974
0
            }
975
976
3.24k
            const auto data_index = index_check_const(i, json_data_const);
977
3.24k
            if (l_null_map && (*l_null_map)[data_index]) {
978
248
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
979
248
                continue;
980
248
            }
981
982
3.00k
            size_t l_off = loffsets[data_index - 1];
983
3.00k
            size_t l_size = loffsets[data_index] - l_off;
984
3.00k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
985
3.00k
            if (rdata_columns.size() == 1) { // just return origin value
986
3.00k
                const auto path_index = index_check_const(i, path_const[0]);
987
3.00k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
988
16
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
989
16
                    continue;
990
16
                }
991
992
2.98k
                if (!path_const[0]) {
993
18
                    RETURN_IF_ERROR(parse_json_path(i, 0));
994
18
                }
995
996
2.98k
                writer->reset();
997
2.98k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
998
2.98k
                                l_size, json_path_list[0]);
999
2.98k
            } else { // will make array string to user
1000
0
                writer->reset();
1001
0
                bool has_value = false;
1002
1003
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1004
0
                const JsonbDocument* doc = nullptr;
1005
0
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1006
1007
0
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1008
0
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1009
0
                        continue;
1010
0
                    }
1011
1012
0
                    const auto path_index = index_check_const(i, path_const[pi]);
1013
0
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1014
0
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1015
0
                        break;
1016
0
                    }
1017
1018
0
                    if (!path_const[pi]) {
1019
0
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1020
0
                    }
1021
1022
0
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1023
1024
0
                    if (find_result.value) {
1025
0
                        if (!has_value) {
1026
0
                            has_value = true;
1027
0
                            writer->writeStartArray();
1028
0
                        }
1029
0
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1030
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1031
                            // if value is array, we should write all items in array, instead of write the array itself.
1032
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1033
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1034
0
                                writer->writeValue(&item);
1035
0
                            }
1036
0
                        } else {
1037
0
                            writer->writeValue(find_result.value);
1038
0
                        }
1039
0
                    }
1040
0
                }
1041
0
                if (has_value) {
1042
0
                    writer->writeEndArray();
1043
0
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1044
0
                                                                 writer->getOutput()->getSize()),
1045
0
                                                i, res_data, res_offsets);
1046
0
                } else {
1047
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1048
0
                }
1049
0
            }
1050
3.00k
        } //for
1051
1.32k
        return Status::OK();
1052
1.32k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
928
10.0k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
929
10.0k
        const size_t input_rows_count = null_map.size();
930
10.0k
        res_offsets.resize(input_rows_count);
931
932
10.0k
        auto writer = std::make_unique<JsonbWriter>();
933
10.0k
        std::unique_ptr<JsonbToJson> formater;
934
935
        // reuseable json path list, espacially for const path
936
10.0k
        std::vector<JsonbPath> json_path_list;
937
10.0k
        json_path_list.resize(rdata_columns.size());
938
939
        // lambda function to parse json path for row i and path pi
940
10.0k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
10.0k
            const auto index = index_check_const(i, path_const[pi]);
942
943
10.0k
            const ColumnString* path_col = rdata_columns[pi];
944
10.0k
            const ColumnString::Chars& rdata = path_col->get_chars();
945
10.0k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
10.0k
            size_t r_off = roffsets[index - 1];
947
10.0k
            size_t r_size = roffsets[index] - r_off;
948
10.0k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
10.0k
            JsonbPath path;
951
10.0k
            if (!path.seek(r_raw, r_size)) {
952
10.0k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
10.0k
                                               std::string_view(r_raw, r_size));
954
10.0k
            }
955
956
10.0k
            json_path_list[pi] = std::move(path);
957
958
10.0k
            return Status::OK();
959
10.0k
        };
960
961
20.3k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
962
10.3k
            if (path_const[pi]) {
963
10.1k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
964
40
                    continue;
965
40
                }
966
10.0k
                RETURN_IF_ERROR(parse_json_path(0, pi));
967
10.0k
            }
968
10.3k
        }
969
970
9.99k
        res_data.reserve(ldata.size());
971
150k
        for (size_t i = 0; i < input_rows_count; ++i) {
972
140k
            if (null_map[i]) {
973
0
                continue;
974
0
            }
975
976
140k
            const auto data_index = index_check_const(i, json_data_const);
977
140k
            if (l_null_map && (*l_null_map)[data_index]) {
978
1.65k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
979
1.65k
                continue;
980
1.65k
            }
981
982
138k
            size_t l_off = loffsets[data_index - 1];
983
138k
            size_t l_size = loffsets[data_index] - l_off;
984
138k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
985
138k
            if (rdata_columns.size() == 1) { // just return origin value
986
138k
                const auto path_index = index_check_const(i, path_const[0]);
987
138k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
988
14
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
989
14
                    continue;
990
14
                }
991
992
138k
                if (!path_const[0]) {
993
268
                    RETURN_IF_ERROR(parse_json_path(i, 0));
994
268
                }
995
996
138k
                writer->reset();
997
138k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
998
138k
                                l_size, json_path_list[0]);
999
138k
            } else { // will make array string to user
1000
610
                writer->reset();
1001
610
                bool has_value = false;
1002
1003
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1004
610
                const JsonbDocument* doc = nullptr;
1005
610
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1006
1007
1.69k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1008
1.18k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1009
0
                        continue;
1010
0
                    }
1011
1012
1.18k
                    const auto path_index = index_check_const(i, path_const[pi]);
1013
1.18k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1014
98
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1015
98
                        break;
1016
98
                    }
1017
1018
1.08k
                    if (!path_const[pi]) {
1019
22
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1020
22
                    }
1021
1022
1.08k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1023
1024
1.08k
                    if (find_result.value) {
1025
249
                        if (!has_value) {
1026
138
                            has_value = true;
1027
138
                            writer->writeStartArray();
1028
138
                        }
1029
249
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1030
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1031
                            // if value is array, we should write all items in array, instead of write the array itself.
1032
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1033
45
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1034
45
                                writer->writeValue(&item);
1035
45
                            }
1036
229
                        } else {
1037
229
                            writer->writeValue(find_result.value);
1038
229
                        }
1039
249
                    }
1040
1.08k
                }
1041
610
                if (has_value) {
1042
138
                    writer->writeEndArray();
1043
138
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1044
138
                                                                 writer->getOutput()->getSize()),
1045
138
                                                i, res_data, res_offsets);
1046
472
                } else {
1047
472
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1048
472
                }
1049
610
            }
1050
138k
        } //for
1051
9.99k
        return Status::OK();
1052
9.99k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
928
6
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
929
6
        const size_t input_rows_count = null_map.size();
930
6
        res_offsets.resize(input_rows_count);
931
932
6
        auto writer = std::make_unique<JsonbWriter>();
933
6
        std::unique_ptr<JsonbToJson> formater;
934
935
        // reuseable json path list, espacially for const path
936
6
        std::vector<JsonbPath> json_path_list;
937
6
        json_path_list.resize(rdata_columns.size());
938
939
        // lambda function to parse json path for row i and path pi
940
6
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
941
6
            const auto index = index_check_const(i, path_const[pi]);
942
943
6
            const ColumnString* path_col = rdata_columns[pi];
944
6
            const ColumnString::Chars& rdata = path_col->get_chars();
945
6
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
946
6
            size_t r_off = roffsets[index - 1];
947
6
            size_t r_size = roffsets[index] - r_off;
948
6
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
949
950
6
            JsonbPath path;
951
6
            if (!path.seek(r_raw, r_size)) {
952
6
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
953
6
                                               std::string_view(r_raw, r_size));
954
6
            }
955
956
6
            json_path_list[pi] = std::move(path);
957
958
6
            return Status::OK();
959
6
        };
960
961
16
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
962
10
            if (path_const[pi]) {
963
0
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
964
0
                    continue;
965
0
                }
966
0
                RETURN_IF_ERROR(parse_json_path(0, pi));
967
0
            }
968
10
        }
969
970
6
        res_data.reserve(ldata.size());
971
12
        for (size_t i = 0; i < input_rows_count; ++i) {
972
6
            if (null_map[i]) {
973
0
                continue;
974
0
            }
975
976
6
            const auto data_index = index_check_const(i, json_data_const);
977
6
            if (l_null_map && (*l_null_map)[data_index]) {
978
1
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
979
1
                continue;
980
1
            }
981
982
5
            size_t l_off = loffsets[data_index - 1];
983
5
            size_t l_size = loffsets[data_index] - l_off;
984
5
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
985
5
            if (rdata_columns.size() == 1) { // just return origin value
986
2
                const auto path_index = index_check_const(i, path_const[0]);
987
2
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
988
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
989
0
                    continue;
990
0
                }
991
992
2
                if (!path_const[0]) {
993
2
                    RETURN_IF_ERROR(parse_json_path(i, 0));
994
2
                }
995
996
2
                writer->reset();
997
2
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
998
2
                                l_size, json_path_list[0]);
999
3
            } else { // will make array string to user
1000
3
                writer->reset();
1001
3
                bool has_value = false;
1002
1003
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1004
3
                const JsonbDocument* doc = nullptr;
1005
3
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1006
1007
9
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1008
7
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1009
0
                        continue;
1010
0
                    }
1011
1012
7
                    const auto path_index = index_check_const(i, path_const[pi]);
1013
7
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1014
1
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1015
1
                        break;
1016
1
                    }
1017
1018
6
                    if (!path_const[pi]) {
1019
6
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1020
6
                    }
1021
1022
6
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1023
1024
6
                    if (find_result.value) {
1025
6
                        if (!has_value) {
1026
3
                            has_value = true;
1027
3
                            writer->writeStartArray();
1028
3
                        }
1029
6
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1030
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1031
                            // if value is array, we should write all items in array, instead of write the array itself.
1032
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1033
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1034
0
                                writer->writeValue(&item);
1035
0
                            }
1036
6
                        } else {
1037
6
                            writer->writeValue(find_result.value);
1038
6
                        }
1039
6
                    }
1040
6
                }
1041
3
                if (has_value) {
1042
3
                    writer->writeEndArray();
1043
3
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1044
3
                                                                 writer->getOutput()->getSize()),
1045
3
                                                i, res_data, res_offsets);
1046
3
                } else {
1047
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1048
0
                }
1049
3
            }
1050
5
        } //for
1051
6
        return Status::OK();
1052
6
    }
1053
1054
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1055
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1056
                                const ColumnString::Chars& rdata,
1057
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1058
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1059
                                NullMap& null_map) {
1060
        size_t input_rows_count = loffsets.size();
1061
        res_offsets.resize(input_rows_count);
1062
1063
        std::unique_ptr<JsonbToJson> formater;
1064
1065
        JsonbWriter writer;
1066
        for (size_t i = 0; i < input_rows_count; ++i) {
1067
            if (l_null_map && (*l_null_map)[i]) {
1068
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1069
                continue;
1070
            }
1071
1072
            if (r_null_map && (*r_null_map)[i]) {
1073
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1074
                continue;
1075
            }
1076
1077
            int l_size = loffsets[i] - loffsets[i - 1];
1078
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1079
1080
            int r_size = roffsets[i] - roffsets[i - 1];
1081
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1082
1083
            JsonbPath path;
1084
            if (!path.seek(r_raw, r_size)) {
1085
                return Status::InvalidArgument(
1086
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1087
                        std::string_view(r_raw, r_size), i);
1088
            }
1089
1090
            writer.reset();
1091
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1092
                            path);
1093
        } //for
1094
        return Status::OK();
1095
    } //function
1096
1097
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1098
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1099
                                const StringRef& rdata, ColumnString::Chars& res_data,
1100
                                ColumnString::Offsets& res_offsets, NullMap& null_map) {
1101
        size_t input_rows_count = loffsets.size();
1102
        res_offsets.resize(input_rows_count);
1103
1104
        std::unique_ptr<JsonbToJson> formater;
1105
1106
        JsonbPath path;
1107
        if (!path.seek(rdata.data, rdata.size)) {
1108
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1109
                                           std::string_view(rdata.data, rdata.size));
1110
        }
1111
1112
        JsonbWriter writer;
1113
        for (size_t i = 0; i < input_rows_count; ++i) {
1114
            if (l_null_map && (*l_null_map)[i]) {
1115
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1116
                continue;
1117
            }
1118
1119
            int l_size = loffsets[i] - loffsets[i - 1];
1120
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1121
1122
            writer.reset();
1123
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1124
                            path);
1125
        } //for
1126
        return Status::OK();
1127
    } //function
1128
1129
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1130
                                const ColumnString::Chars& rdata,
1131
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1132
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1133
                                NullMap& null_map) {
1134
        size_t input_rows_count = roffsets.size();
1135
        res_offsets.resize(input_rows_count);
1136
1137
        std::unique_ptr<JsonbToJson> formater;
1138
1139
        JsonbWriter writer;
1140
1141
        for (size_t i = 0; i < input_rows_count; ++i) {
1142
            if (r_null_map && (*r_null_map)[i]) {
1143
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1144
                continue;
1145
            }
1146
1147
            int r_size = roffsets[i] - roffsets[i - 1];
1148
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1149
1150
            JsonbPath path;
1151
            if (!path.seek(r_raw, r_size)) {
1152
                return Status::InvalidArgument(
1153
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1154
                        std::string_view(r_raw, r_size), i);
1155
            }
1156
1157
            writer.reset();
1158
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data,
1159
                            ldata.size, path);
1160
        } //for
1161
        return Status::OK();
1162
    } //function
1163
};
1164
1165
struct JsonbExtractIsnull {
1166
    static constexpr auto name = "json_extract_isnull";
1167
    static constexpr auto alias = "jsonb_extract_isnull";
1168
1169
    using ReturnType = DataTypeUInt8;
1170
    using ColumnType = ColumnUInt8;
1171
    using Container = typename ColumnType::Container;
1172
1173
private:
1174
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
1175
                                              const char* l_raw_str, size_t l_str_size,
1176
2.97k
                                              JsonbPath& path) {
1177
2.97k
        if (null_map[i]) {
1178
0
            res[i] = 0;
1179
0
            return;
1180
0
        }
1181
1182
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1183
2.97k
        const JsonbDocument* doc = nullptr;
1184
2.97k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
1185
2.97k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1186
0
            null_map[i] = 1;
1187
0
            res[i] = 0;
1188
0
            return;
1189
0
        }
1190
1191
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
1192
2.97k
        auto find_result = doc->getValue()->findValue(path);
1193
2.97k
        const auto* value = find_result.value;
1194
1195
2.97k
        if (UNLIKELY(!value)) {
1196
2.55k
            null_map[i] = 1;
1197
2.55k
            res[i] = 0;
1198
2.55k
            return;
1199
2.55k
        }
1200
1201
417
        res[i] = value->isNull();
1202
417
    }
1203
1204
public:
1205
    // for jsonb_extract_int/int64/double
1206
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1207
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1208
                                const ColumnString::Chars& rdata,
1209
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1210
1
                                Container& res, NullMap& null_map) {
1211
1
        size_t size = loffsets.size();
1212
1
        res.resize(size);
1213
1214
13
        for (size_t i = 0; i < loffsets.size(); i++) {
1215
12
            if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) {
1216
8
                res[i] = 0;
1217
8
                null_map[i] = 1;
1218
8
                continue;
1219
8
            }
1220
1221
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1222
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
1223
1224
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1225
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
1226
1227
4
            JsonbPath path;
1228
4
            if (!path.seek(r_raw_str, r_str_size)) {
1229
0
                return Status::InvalidArgument(
1230
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1231
0
                        std::string_view(r_raw_str, r_str_size), i);
1232
0
            }
1233
1234
4
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1235
4
        } //for
1236
1
        return Status::OK();
1237
1
    } //function
1238
1239
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1240
                                const ColumnString::Chars& rdata,
1241
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1242
1
                                Container& res, NullMap& null_map) {
1243
1
        size_t size = roffsets.size();
1244
1
        res.resize(size);
1245
1246
13
        for (size_t i = 0; i < size; i++) {
1247
12
            if (r_null_map && (*r_null_map)[i]) {
1248
4
                res[i] = 0;
1249
4
                null_map[i] = 1;
1250
4
                continue;
1251
4
            }
1252
1253
8
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1254
8
            int r_str_size = roffsets[i] - roffsets[i - 1];
1255
1256
8
            JsonbPath path;
1257
8
            if (!path.seek(r_raw_str, r_str_size)) {
1258
0
                return Status::InvalidArgument(
1259
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1260
0
                        std::string_view(r_raw_str, r_str_size), i);
1261
0
            }
1262
1263
8
            inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path);
1264
8
        } //for
1265
1
        return Status::OK();
1266
1
    } //function
1267
1268
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1269
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1270
1.32k
                                const StringRef& rdata, Container& res, NullMap& null_map) {
1271
1.32k
        size_t size = loffsets.size();
1272
1.32k
        res.resize(size);
1273
1274
1.32k
        JsonbPath path;
1275
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
1276
0
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1277
0
                                           std::string_view(rdata.data, rdata.size));
1278
0
        }
1279
1280
4.50k
        for (size_t i = 0; i < loffsets.size(); i++) {
1281
3.18k
            if (l_null_map && (*l_null_map)[i]) {
1282
228
                res[i] = 0;
1283
228
                null_map[i] = 1;
1284
228
                continue;
1285
228
            }
1286
1287
2.95k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1288
2.95k
            int l_str_size = loffsets[i] - loffsets[i - 1];
1289
1290
2.95k
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1291
2.95k
        } //for
1292
1.32k
        return Status::OK();
1293
1.32k
    } //function
1294
};
1295
1296
struct JsonbTypeJson {
1297
    using T = std::string;
1298
    using ReturnType = DataTypeJsonb;
1299
    using ColumnType = ColumnString;
1300
    static const bool only_get_type = false;
1301
    static const bool no_quotes = false;
1302
};
1303
1304
struct JsonbTypeJsonNoQuotes {
1305
    using T = std::string;
1306
    using ReturnType = DataTypeJsonb;
1307
    using ColumnType = ColumnString;
1308
    static const bool only_get_type = false;
1309
    static const bool no_quotes = true;
1310
};
1311
1312
struct JsonbTypeType {
1313
    using T = std::string;
1314
    using ReturnType = DataTypeString;
1315
    using ColumnType = ColumnString;
1316
    static const bool only_get_type = true;
1317
    static const bool no_quotes = false;
1318
};
1319
1320
struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> {
1321
    static constexpr auto name = "jsonb_extract";
1322
    static constexpr auto alias = "json_extract";
1323
};
1324
1325
struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> {
1326
    static constexpr auto name = "jsonb_extract_no_quotes";
1327
    static constexpr auto alias = "json_extract_no_quotes";
1328
};
1329
1330
struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> {
1331
    static constexpr auto name = "json_type";
1332
    static constexpr auto alias = "jsonb_type";
1333
};
1334
1335
using FunctionJsonbExists = FunctionJsonbExtractPath;
1336
using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>;
1337
1338
using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
1339
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
1340
using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>;
1341
1342
template <typename Impl>
1343
class FunctionJsonbLength : public IFunction {
1344
public:
1345
    static constexpr auto name = "json_length";
1346
1
    String get_name() const override { return name; }
1347
44
    static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); }
1348
1349
35
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1350
35
        return make_nullable(std::make_shared<DataTypeInt32>());
1351
35
    }
1352
43
    DataTypes get_variadic_argument_types_impl() const override {
1353
43
        return Impl::get_variadic_argument_types();
1354
43
    }
1355
35
    size_t get_number_of_arguments() const override {
1356
35
        return get_variadic_argument_types_impl().size();
1357
35
    }
1358
1359
124
    bool use_default_implementation_for_nulls() const override { return false; }
1360
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1361
89
                        uint32_t result, size_t input_rows_count) const override {
1362
89
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1363
89
    }
1364
};
1365
1366
struct JsonbLengthUtil {
1367
    static Status jsonb_length_execute(FunctionContext* context, Block& block,
1368
                                       const ColumnNumbers& arguments, uint32_t result,
1369
89
                                       size_t input_rows_count) {
1370
89
        DCHECK_GE(arguments.size(), 2);
1371
89
        ColumnPtr jsonb_data_column;
1372
89
        bool jsonb_data_const = false;
1373
        // prepare jsonb data column
1374
89
        std::tie(jsonb_data_column, jsonb_data_const) =
1375
89
                unpack_if_const(block.get_by_position(arguments[0]).column);
1376
89
        ColumnPtr path_column;
1377
89
        bool is_const = false;
1378
89
        std::tie(path_column, is_const) =
1379
89
                unpack_if_const(block.get_by_position(arguments[1]).column);
1380
1381
89
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1382
89
        auto return_type = block.get_data_type(result);
1383
89
        MutableColumnPtr res = return_type->create_column();
1384
1385
89
        JsonbPath path;
1386
89
        if (is_const) {
1387
61
            if (path_column->is_null_at(0)) {
1388
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1389
1
                    null_map->get_data()[i] = 1;
1390
1
                    res->insert_data(nullptr, 0);
1391
1
                }
1392
1393
1
                block.replace_by_position(
1394
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1395
1
                return Status::OK();
1396
1
            }
1397
1398
60
            auto path_value = path_column->get_data_at(0);
1399
60
            if (!path.seek(path_value.data, path_value.size)) {
1400
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1401
0
                                               std::string_view(path_value.data, path_value.size));
1402
0
            }
1403
60
        }
1404
1405
263
        for (size_t i = 0; i < input_rows_count; ++i) {
1406
175
            if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) ||
1407
175
                (jsonb_data_column->get_data_at(i).size == 0)) {
1408
18
                null_map->get_data()[i] = 1;
1409
18
                res->insert_data(nullptr, 0);
1410
18
                continue;
1411
18
            }
1412
157
            if (!is_const) {
1413
25
                auto path_value = path_column->get_data_at(i);
1414
25
                path.clean();
1415
25
                if (!path.seek(path_value.data, path_value.size)) {
1416
0
                    return Status::InvalidArgument(
1417
0
                            "Json path error: Invalid Json Path for value: {}",
1418
0
                            std::string_view(reinterpret_cast<const char*>(path_value.data),
1419
0
                                             path_value.size));
1420
0
                }
1421
25
            }
1422
157
            auto jsonb_value = jsonb_data_column->get_data_at(i);
1423
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1424
157
            const JsonbDocument* doc = nullptr;
1425
157
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
1426
157
                                                                  jsonb_value.size, &doc));
1427
157
            auto find_result = doc->getValue()->findValue(path);
1428
157
            const auto* value = find_result.value;
1429
157
            if (UNLIKELY(!value)) {
1430
74
                null_map->get_data()[i] = 1;
1431
74
                res->insert_data(nullptr, 0);
1432
74
                continue;
1433
74
            }
1434
83
            auto length = value->numElements();
1435
83
            res->insert_data(const_cast<const char*>((char*)&length), 0);
1436
83
        }
1437
88
        block.replace_by_position(result,
1438
88
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1439
88
        return Status::OK();
1440
88
    }
1441
};
1442
1443
struct JsonbLengthAndPathImpl {
1444
43
    static DataTypes get_variadic_argument_types() {
1445
43
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
1446
43
    }
1447
1448
    static Status execute_impl(FunctionContext* context, Block& block,
1449
                               const ColumnNumbers& arguments, uint32_t result,
1450
89
                               size_t input_rows_count) {
1451
89
        return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result,
1452
89
                                                     input_rows_count);
1453
89
    }
1454
};
1455
1456
template <typename Impl>
1457
class FunctionJsonbContains : public IFunction {
1458
public:
1459
    static constexpr auto name = "json_contains";
1460
1
    String get_name() const override { return name; }
1461
58
    static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); }
1462
1463
49
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1464
49
        return make_nullable(std::make_shared<DataTypeUInt8>());
1465
49
    }
1466
57
    DataTypes get_variadic_argument_types_impl() const override {
1467
57
        return Impl::get_variadic_argument_types();
1468
57
    }
1469
49
    size_t get_number_of_arguments() const override {
1470
49
        return get_variadic_argument_types_impl().size();
1471
49
    }
1472
1473
176
    bool use_default_implementation_for_nulls() const override { return false; }
1474
1475
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1476
126
                        uint32_t result, size_t input_rows_count) const override {
1477
126
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1478
126
    }
1479
};
1480
1481
struct JsonbContainsUtil {
1482
    static Status jsonb_contains_execute(FunctionContext* context, Block& block,
1483
                                         const ColumnNumbers& arguments, uint32_t result,
1484
126
                                         size_t input_rows_count) {
1485
126
        DCHECK_GE(arguments.size(), 3);
1486
1487
126
        auto jsonb_data1_column = block.get_by_position(arguments[0]).column;
1488
126
        auto jsonb_data2_column = block.get_by_position(arguments[1]).column;
1489
1490
126
        ColumnPtr path_column;
1491
126
        bool is_const = false;
1492
126
        std::tie(path_column, is_const) =
1493
126
                unpack_if_const(block.get_by_position(arguments[2]).column);
1494
1495
126
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1496
126
        auto return_type = block.get_data_type(result);
1497
126
        MutableColumnPtr res = return_type->create_column();
1498
1499
126
        JsonbPath path;
1500
126
        if (is_const) {
1501
85
            if (path_column->is_null_at(0)) {
1502
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1503
1
                    null_map->get_data()[i] = 1;
1504
1
                    res->insert_data(nullptr, 0);
1505
1
                }
1506
1507
1
                block.replace_by_position(
1508
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1509
1
                return Status::OK();
1510
1
            }
1511
1512
84
            auto path_value = path_column->get_data_at(0);
1513
84
            if (!path.seek(path_value.data, path_value.size)) {
1514
2
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1515
2
                                               std::string_view(path_value.data, path_value.size));
1516
2
            }
1517
84
        }
1518
1519
375
        for (size_t i = 0; i < input_rows_count; ++i) {
1520
253
            if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) ||
1521
253
                path_column->is_null_at(i)) {
1522
28
                null_map->get_data()[i] = 1;
1523
28
                res->insert_data(nullptr, 0);
1524
28
                continue;
1525
28
            }
1526
1527
225
            if (!is_const) {
1528
45
                auto path_value = path_column->get_data_at(i);
1529
45
                path.clean();
1530
45
                if (!path.seek(path_value.data, path_value.size)) {
1531
1
                    return Status::InvalidArgument(
1532
1
                            "Json path error: Invalid Json Path for value: {}",
1533
1
                            std::string_view(path_value.data, path_value.size));
1534
1
                }
1535
45
            }
1536
1537
224
            auto jsonb_value1 = jsonb_data1_column->get_data_at(i);
1538
224
            auto jsonb_value2 = jsonb_data2_column->get_data_at(i);
1539
1540
224
            if (jsonb_value1.size == 0 || jsonb_value2.size == 0) {
1541
1
                null_map->get_data()[i] = 1;
1542
1
                res->insert_data(nullptr, 0);
1543
1
                continue;
1544
1
            }
1545
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1546
223
            const JsonbDocument* doc1 = nullptr;
1547
223
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
1548
223
                                                                  jsonb_value1.size, &doc1));
1549
223
            const JsonbDocument* doc2 = nullptr;
1550
223
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
1551
223
                                                                  jsonb_value2.size, &doc2));
1552
1553
223
            auto find_result = doc1->getValue()->findValue(path);
1554
223
            const auto* value1 = find_result.value;
1555
223
            const JsonbValue* value2 = doc2->getValue();
1556
223
            if (!value1 || !value2) {
1557
45
                null_map->get_data()[i] = 1;
1558
45
                res->insert_data(nullptr, 0);
1559
45
                continue;
1560
45
            }
1561
178
            auto contains_value = value1->contains(value2);
1562
178
            res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
1563
178
        }
1564
1565
122
        block.replace_by_position(result,
1566
122
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1567
122
        return Status::OK();
1568
123
    }
1569
};
1570
1571
template <bool ignore_null>
1572
class FunctionJsonbArray : public IFunction {
1573
public:
1574
    static constexpr auto name = "json_array";
1575
    static constexpr auto alias = "jsonb_array";
1576
1577
48
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb0EE6createEv
Line
Count
Source
1577
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb1EE6createEv
Line
Count
Source
1577
11
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
1578
1579
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev
1580
1581
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv
1582
32
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv
Line
Count
Source
1582
29
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv
Line
Count
Source
1582
3
    bool is_variadic() const override { return true; }
1583
1584
58
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1584
54
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1584
4
    bool use_default_implementation_for_nulls() const override { return false; }
1585
1586
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1587
30
        return std::make_shared<DataTypeJsonb>();
1588
30
    }
_ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1586
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1587
28
        return std::make_shared<DataTypeJsonb>();
1588
28
    }
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1586
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1587
2
        return std::make_shared<DataTypeJsonb>();
1588
2
    }
1589
1590
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1591
30
                        uint32_t result, size_t input_rows_count) const override {
1592
30
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1593
30
        auto column = return_data_type->create_column();
1594
30
        column->reserve(input_rows_count);
1595
1596
30
        JsonbWriter writer;
1597
94
        for (size_t i = 0; i < input_rows_count; ++i) {
1598
60
            writer.writeStartArray();
1599
173
            for (auto argument : arguments) {
1600
173
                auto&& [arg_column, is_const] =
1601
173
                        unpack_if_const(block.get_by_position(argument).column);
1602
173
                if (arg_column->is_nullable()) {
1603
83
                    const auto& nullable_column =
1604
83
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1605
83
                                    *arg_column);
1606
83
                    const auto& null_map = nullable_column.get_null_map_data();
1607
83
                    const auto& nested_column = nullable_column.get_nested_column();
1608
83
                    const auto& jsonb_column =
1609
83
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1610
83
                                    nested_column);
1611
1612
83
                    auto index = index_check_const(i, is_const);
1613
83
                    if (null_map[index]) {
1614
30
                        if constexpr (ignore_null) {
1615
4
                            continue;
1616
26
                        } else {
1617
26
                            writer.writeNull();
1618
26
                        }
1619
53
                    } else {
1620
53
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1621
53
                        const JsonbDocument* doc = nullptr;
1622
53
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
53
                                                                        jsonb_binary.size, &doc);
1624
53
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
0
                            if constexpr (ignore_null) {
1626
0
                                continue;
1627
0
                            } else {
1628
0
                                writer.writeNull();
1629
0
                            }
1630
53
                        } else {
1631
53
                            writer.writeValue(doc->getValue());
1632
53
                        }
1633
53
                    }
1634
90
                } else {
1635
90
                    const auto& jsonb_column =
1636
90
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1637
90
                                    *arg_column);
1638
1639
90
                    auto index = index_check_const(i, is_const);
1640
90
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1641
90
                    const JsonbDocument* doc = nullptr;
1642
90
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1643
90
                                                                    jsonb_binary.size, &doc);
1644
90
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1645
0
                        if constexpr (ignore_null) {
1646
0
                            continue;
1647
0
                        } else {
1648
0
                            writer.writeNull();
1649
0
                        }
1650
90
                    } else {
1651
90
                        writer.writeValue(doc->getValue());
1652
90
                    }
1653
90
                }
1654
173
            }
1655
19
            writer.writeEndArray();
1656
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1657
19
            writer.reset();
1658
19
        }
1659
1660
6
        block.get_by_position(result).column = std::move(column);
1661
6
        return Status::OK();
1662
30
    }
_ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1591
28
                        uint32_t result, size_t input_rows_count) const override {
1592
28
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1593
28
        auto column = return_data_type->create_column();
1594
28
        column->reserve(input_rows_count);
1595
1596
28
        JsonbWriter writer;
1597
73
        for (size_t i = 0; i < input_rows_count; ++i) {
1598
45
            writer.writeStartArray();
1599
143
            for (auto argument : arguments) {
1600
143
                auto&& [arg_column, is_const] =
1601
143
                        unpack_if_const(block.get_by_position(argument).column);
1602
143
                if (arg_column->is_nullable()) {
1603
58
                    const auto& nullable_column =
1604
58
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1605
58
                                    *arg_column);
1606
58
                    const auto& null_map = nullable_column.get_null_map_data();
1607
58
                    const auto& nested_column = nullable_column.get_nested_column();
1608
58
                    const auto& jsonb_column =
1609
58
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1610
58
                                    nested_column);
1611
1612
58
                    auto index = index_check_const(i, is_const);
1613
58
                    if (null_map[index]) {
1614
                        if constexpr (ignore_null) {
1615
                            continue;
1616
26
                        } else {
1617
26
                            writer.writeNull();
1618
26
                        }
1619
32
                    } else {
1620
32
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1621
32
                        const JsonbDocument* doc = nullptr;
1622
32
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
32
                                                                        jsonb_binary.size, &doc);
1624
32
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
                            if constexpr (ignore_null) {
1626
                                continue;
1627
0
                            } else {
1628
0
                                writer.writeNull();
1629
0
                            }
1630
32
                        } else {
1631
32
                            writer.writeValue(doc->getValue());
1632
32
                        }
1633
32
                    }
1634
85
                } else {
1635
85
                    const auto& jsonb_column =
1636
85
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1637
85
                                    *arg_column);
1638
1639
85
                    auto index = index_check_const(i, is_const);
1640
85
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1641
85
                    const JsonbDocument* doc = nullptr;
1642
85
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1643
85
                                                                    jsonb_binary.size, &doc);
1644
85
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1645
                        if constexpr (ignore_null) {
1646
                            continue;
1647
0
                        } else {
1648
0
                            writer.writeNull();
1649
0
                        }
1650
85
                    } else {
1651
85
                        writer.writeValue(doc->getValue());
1652
85
                    }
1653
85
                }
1654
143
            }
1655
45
            writer.writeEndArray();
1656
45
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1657
45
            writer.reset();
1658
45
        }
1659
1660
28
        block.get_by_position(result).column = std::move(column);
1661
28
        return Status::OK();
1662
28
    }
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1591
2
                        uint32_t result, size_t input_rows_count) const override {
1592
2
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1593
2
        auto column = return_data_type->create_column();
1594
2
        column->reserve(input_rows_count);
1595
1596
2
        JsonbWriter writer;
1597
21
        for (size_t i = 0; i < input_rows_count; ++i) {
1598
15
            writer.writeStartArray();
1599
30
            for (auto argument : arguments) {
1600
30
                auto&& [arg_column, is_const] =
1601
30
                        unpack_if_const(block.get_by_position(argument).column);
1602
30
                if (arg_column->is_nullable()) {
1603
25
                    const auto& nullable_column =
1604
25
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1605
25
                                    *arg_column);
1606
25
                    const auto& null_map = nullable_column.get_null_map_data();
1607
25
                    const auto& nested_column = nullable_column.get_nested_column();
1608
25
                    const auto& jsonb_column =
1609
25
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1610
25
                                    nested_column);
1611
1612
25
                    auto index = index_check_const(i, is_const);
1613
25
                    if (null_map[index]) {
1614
4
                        if constexpr (ignore_null) {
1615
4
                            continue;
1616
                        } else {
1617
                            writer.writeNull();
1618
                        }
1619
21
                    } else {
1620
21
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1621
21
                        const JsonbDocument* doc = nullptr;
1622
21
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
21
                                                                        jsonb_binary.size, &doc);
1624
21
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
0
                            if constexpr (ignore_null) {
1626
0
                                continue;
1627
                            } else {
1628
                                writer.writeNull();
1629
                            }
1630
21
                        } else {
1631
21
                            writer.writeValue(doc->getValue());
1632
21
                        }
1633
21
                    }
1634
25
                } else {
1635
5
                    const auto& jsonb_column =
1636
5
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1637
5
                                    *arg_column);
1638
1639
5
                    auto index = index_check_const(i, is_const);
1640
5
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1641
5
                    const JsonbDocument* doc = nullptr;
1642
5
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1643
5
                                                                    jsonb_binary.size, &doc);
1644
5
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1645
0
                        if constexpr (ignore_null) {
1646
0
                            continue;
1647
                        } else {
1648
                            writer.writeNull();
1649
                        }
1650
5
                    } else {
1651
5
                        writer.writeValue(doc->getValue());
1652
5
                    }
1653
5
                }
1654
30
            }
1655
19
            writer.writeEndArray();
1656
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1657
19
            writer.reset();
1658
19
        }
1659
1660
6
        block.get_by_position(result).column = std::move(column);
1661
6
        return Status::OK();
1662
2
    }
1663
};
1664
1665
class FunctionJsonbObject : public IFunction {
1666
public:
1667
    static constexpr auto name = "json_object";
1668
    static constexpr auto alias = "jsonb_object";
1669
1670
42
    static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); }
1671
1672
0
    String get_name() const override { return name; }
1673
1674
0
    size_t get_number_of_arguments() const override { return 0; }
1675
34
    bool is_variadic() const override { return true; }
1676
1677
75
    bool use_default_implementation_for_nulls() const override { return false; }
1678
1679
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1680
33
        return std::make_shared<DataTypeJsonb>();
1681
33
    }
1682
1683
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1684
46
                        uint32_t result, size_t input_rows_count) const override {
1685
46
        if (arguments.size() % 2 != 0) {
1686
0
            return Status::InvalidArgument(
1687
0
                    "JSON object must have an even number of arguments, but got: {}",
1688
0
                    arguments.size());
1689
0
        }
1690
1691
46
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1692
1693
46
        auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const,
1694
194
                            const NullMap* null_map, const size_t arg_index, const size_t row_idx) {
1695
194
            auto index = index_check_const(row_idx, is_const);
1696
194
            if (null_map && (*null_map)[index]) {
1697
1
                return Status::InvalidArgument(
1698
1
                        "JSON documents may not contain NULL member name(argument "
1699
1
                        "index:  "
1700
1
                        "{}, row index: {})",
1701
1
                        row_idx, arg_index);
1702
1
            }
1703
1704
193
            auto key_string = key_col.get_data_at(index);
1705
193
            if (key_string.size > 255) {
1706
0
                return Status::InvalidArgument(
1707
0
                        "JSON object keys(argument index: {}) must be less than 256 "
1708
0
                        "bytes, but got size: {}",
1709
0
                        arg_index, key_string.size);
1710
0
            }
1711
193
            writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size));
1712
193
            return Status::OK();
1713
193
        };
1714
1715
46
        auto write_value = [](JsonbWriter& writer, const ColumnString& value_col,
1716
46
                              const bool is_const, const NullMap* null_map, const size_t arg_index,
1717
193
                              const size_t row_idx) {
1718
193
            auto index = index_check_const(row_idx, is_const);
1719
193
            if (null_map && (*null_map)[index]) {
1720
46
                writer.writeNull();
1721
46
                return Status::OK();
1722
46
            }
1723
1724
147
            auto value_string = value_col.get_data_at(index);
1725
147
            const JsonbDocument* doc = nullptr;
1726
147
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
1727
147
                                                                  value_string.size, &doc));
1728
147
            writer.writeValue(doc->getValue());
1729
147
            return Status::OK();
1730
147
        };
1731
1732
149
        for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1733
103
            auto key_argument = arguments[arg_idx];
1734
103
            auto value_argument = arguments[arg_idx + 1];
1735
1736
103
            auto& key_data_type = block.get_by_position(key_argument).type;
1737
103
            auto& value_data_type = block.get_by_position(value_argument).type;
1738
103
            if (!is_string_type(key_data_type->get_primitive_type())) {
1739
0
                return Status::InvalidArgument(
1740
0
                        "JSON object key(argument index: {}) must be String, but got type: "
1741
0
                        "{}(primitive type: {})",
1742
0
                        arg_idx, key_data_type->get_name(),
1743
0
                        static_cast<int>(key_data_type->get_primitive_type()));
1744
0
            }
1745
1746
103
            if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) {
1747
0
                return Status::InvalidArgument(
1748
0
                        "JSON object value(argument index: {}) must be JSON, but got type: {}",
1749
0
                        arg_idx, value_data_type->get_name());
1750
0
            }
1751
103
        }
1752
1753
46
        auto column = return_data_type->create_column();
1754
46
        column->reserve(input_rows_count);
1755
1756
46
        JsonbWriter writer;
1757
110
        for (size_t i = 0; i != input_rows_count; ++i) {
1758
65
            writer.writeStartObject();
1759
258
            for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1760
194
                auto key_argument = arguments[arg_idx];
1761
194
                auto value_argument = arguments[arg_idx + 1];
1762
194
                auto&& [key_column, key_const] =
1763
194
                        unpack_if_const(block.get_by_position(key_argument).column);
1764
194
                auto&& [value_column, value_const] =
1765
194
                        unpack_if_const(block.get_by_position(value_argument).column);
1766
1767
194
                if (key_column->is_nullable()) {
1768
3
                    const auto& nullable_column =
1769
3
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1770
3
                                    *key_column);
1771
3
                    const auto& null_map = nullable_column.get_null_map_data();
1772
3
                    const auto& nested_column = nullable_column.get_nested_column();
1773
3
                    const auto& key_arg_column =
1774
3
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1775
3
                                    nested_column);
1776
1777
3
                    RETURN_IF_ERROR(
1778
3
                            write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i));
1779
191
                } else {
1780
191
                    const auto& key_arg_column =
1781
191
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1782
191
                                    *key_column);
1783
191
                    RETURN_IF_ERROR(
1784
191
                            write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i));
1785
191
                }
1786
1787
193
                if (value_column->is_nullable()) {
1788
93
                    const auto& nullable_column =
1789
93
                            assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(
1790
93
                                    *value_column);
1791
93
                    const auto& null_map = nullable_column.get_null_map_data();
1792
93
                    const auto& nested_column = nullable_column.get_nested_column();
1793
93
                    const auto& value_arg_column =
1794
93
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1795
93
                                    nested_column);
1796
1797
93
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map,
1798
93
                                                arg_idx + 1, i));
1799
100
                } else {
1800
100
                    const auto& value_arg_column =
1801
100
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1802
100
                                    *value_column);
1803
100
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr,
1804
100
                                                arg_idx + 1, i));
1805
100
                }
1806
193
            }
1807
1808
64
            writer.writeEndObject();
1809
64
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1810
64
            writer.reset();
1811
64
        }
1812
1813
45
        block.get_by_position(result).column = std::move(column);
1814
45
        return Status::OK();
1815
46
    }
1816
};
1817
1818
enum class JsonbModifyType { Insert, Set, Replace };
1819
1820
template <JsonbModifyType modify_type>
1821
struct JsonbModifyName {
1822
    static constexpr auto name = "jsonb_modify";
1823
    static constexpr auto alias = "json_modify";
1824
};
1825
1826
template <>
1827
struct JsonbModifyName<JsonbModifyType::Insert> {
1828
    static constexpr auto name = "jsonb_insert";
1829
    static constexpr auto alias = "json_insert";
1830
};
1831
template <>
1832
struct JsonbModifyName<JsonbModifyType::Set> {
1833
    static constexpr auto name = "jsonb_set";
1834
    static constexpr auto alias = "json_set";
1835
};
1836
template <>
1837
struct JsonbModifyName<JsonbModifyType::Replace> {
1838
    static constexpr auto name = "jsonb_replace";
1839
    static constexpr auto alias = "json_replace";
1840
};
1841
1842
template <JsonbModifyType modify_type>
1843
class FunctionJsonbModify : public IFunction {
1844
public:
1845
    static constexpr auto name = JsonbModifyName<modify_type>::name;
1846
    static constexpr auto alias = JsonbModifyName<modify_type>::alias;
1847
1848
110
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv
Line
Count
Source
1848
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv
Line
Count
Source
1848
36
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv
Line
Count
Source
1848
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
1849
1850
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev
1851
1852
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv
1853
86
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv
Line
Count
Source
1853
29
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv
Line
Count
Source
1853
28
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv
Line
Count
Source
1853
29
    bool is_variadic() const override { return true; }
1854
1855
166
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1855
56
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1855
54
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
1855
56
    bool use_default_implementation_for_nulls() const override { return false; }
1856
1857
83
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1858
83
        return make_nullable(std::make_shared<DataTypeJsonb>());
1859
83
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1857
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1858
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1859
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1857
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1858
27
        return make_nullable(std::make_shared<DataTypeJsonb>());
1859
27
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1857
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1858
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1859
28
    }
1860
1861
    Status create_all_null_result(const DataTypePtr& return_data_type, Block& block,
1862
0
                                  uint32_t result, size_t input_rows_count) const {
1863
0
        auto result_column = return_data_type->create_column();
1864
0
        result_column->insert_default();
1865
0
        auto const_column = ColumnConst::create(std::move(result_column), input_rows_count);
1866
0
        block.get_by_position(result).column = std::move(const_column);
1867
0
        return Status::OK();
1868
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
1869
1870
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1871
83
                        uint32_t result, size_t input_rows_count) const override {
1872
83
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1873
0
            return Status::InvalidArgument(
1874
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1875
0
                    "but got: {}",
1876
0
                    name, arguments.size());
1877
0
        }
1878
1879
83
        const size_t keys_count = (arguments.size() - 1) / 2;
1880
1881
83
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1882
1883
83
        auto result_column = return_data_type->create_column();
1884
83
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1885
83
        auto& null_map = result_nullable_col.get_null_map_data();
1886
83
        auto& res_string_column =
1887
83
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1888
83
        auto& res_chars = res_string_column.get_chars();
1889
83
        auto& res_offsets = res_string_column.get_offsets();
1890
1891
83
        null_map.resize_fill(input_rows_count, 0);
1892
83
        res_offsets.resize(input_rows_count);
1893
83
        auto&& [json_data_arg_column, json_data_const] =
1894
83
                unpack_if_const(block.get_by_position(arguments[0]).column);
1895
1896
83
        if (json_data_const) {
1897
11
            if (json_data_arg_column->is_null_at(0)) {
1898
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1899
0
            }
1900
11
        }
1901
1902
83
        std::vector<const ColumnString*> json_path_columns(keys_count);
1903
83
        std::vector<bool> json_path_constant(keys_count);
1904
83
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1905
1906
83
        std::vector<const ColumnString*> json_value_columns(keys_count);
1907
83
        std::vector<bool> json_value_constant(keys_count);
1908
83
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1909
1910
83
        const NullMap* json_data_null_map = nullptr;
1911
83
        const ColumnString* json_data_column;
1912
83
        if (json_data_arg_column->is_nullable()) {
1913
83
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1914
83
            json_data_null_map = &nullable_column.get_null_map_data();
1915
83
            const auto& nested_column = nullable_column.get_nested_column();
1916
83
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1917
83
        } else {
1918
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1919
0
        }
1920
1921
191
        for (size_t i = 1; i < arguments.size(); i += 2) {
1922
108
            auto&& [path_column, path_const] =
1923
108
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1924
108
            auto&& [value_column, value_const] =
1925
108
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1926
1927
108
            if (path_const) {
1928
27
                if (path_column->is_null_at(0)) {
1929
0
                    return create_all_null_result(return_data_type, block, result,
1930
0
                                                  input_rows_count);
1931
0
                }
1932
27
            }
1933
1934
108
            json_path_constant[i / 2] = path_const;
1935
108
            if (path_column->is_nullable()) {
1936
6
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1937
6
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1938
6
                const auto& nested_column = nullable_column.get_nested_column();
1939
6
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1940
102
            } else {
1941
102
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1942
102
            }
1943
1944
108
            json_value_constant[i / 2] = value_const;
1945
108
            if (value_column->is_nullable()) {
1946
51
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1947
51
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1948
51
                const auto& nested_column = nullable_column.get_nested_column();
1949
51
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1950
57
            } else {
1951
57
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1952
57
            }
1953
108
        }
1954
1955
83
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1956
83
        if (json_data_const) {
1957
11
            auto json_data_string = json_data_column->get_data_at(0);
1958
11
            const JsonbDocument* doc = nullptr;
1959
11
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1960
11
                                                                  json_data_string.size, &doc));
1961
11
            if (!doc || !doc->getValue()) [[unlikely]] {
1962
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1963
0
            }
1964
62
            for (size_t i = 0; i != input_rows_count; ++i) {
1965
51
                json_documents[i] = doc;
1966
51
            }
1967
72
        } else {
1968
144
            for (size_t i = 0; i != input_rows_count; ++i) {
1969
72
                if (json_data_null_map && (*json_data_null_map)[i]) {
1970
0
                    null_map[i] = 1;
1971
0
                    json_documents[i] = nullptr;
1972
0
                    continue;
1973
0
                }
1974
1975
72
                auto json_data_string = json_data_column->get_data_at(i);
1976
72
                const JsonbDocument* doc = nullptr;
1977
72
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1978
72
                                                                      json_data_string.size, &doc));
1979
72
                if (!doc || !doc->getValue()) [[unlikely]] {
1980
0
                    null_map[i] = 1;
1981
0
                    continue;
1982
0
                }
1983
72
                json_documents[i] = doc;
1984
72
            }
1985
72
        }
1986
1987
83
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1988
83
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1989
1990
83
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1991
83
                                               json_path_columns, json_path_constant,
1992
83
                                               json_path_null_maps, json_value_columns,
1993
83
                                               json_value_constant, json_value_null_maps));
1994
1995
75
        JsonbWriter writer;
1996
75
        struct DocumentBuffer {
1997
75
            DorisUniqueBufferPtr<char> ptr;
1998
75
            size_t size = 0;
1999
75
            size_t capacity = 0;
2000
75
        };
2001
2002
75
        DocumentBuffer tmp_buffer;
2003
2004
210
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2005
329
            for (size_t i = 1; i < arguments.size(); i += 2) {
2006
194
                const size_t index = i / 2;
2007
194
                auto& json_path = json_paths[index];
2008
194
                auto& json_value = json_values[index];
2009
2010
194
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2011
194
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2012
2013
194
                if (null_map[row_idx]) {
2014
0
                    continue;
2015
0
                }
2016
2017
194
                if (json_documents[row_idx] == nullptr) {
2018
0
                    null_map[row_idx] = 1;
2019
0
                    continue;
2020
0
                }
2021
2022
194
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2023
4
                    null_map[row_idx] = 1;
2024
4
                    continue;
2025
4
                }
2026
2027
190
                auto find_result =
2028
190
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2029
2030
190
                if (find_result.is_wildcard) {
2031
0
                    return Status::InvalidArgument(
2032
0
                            " In this situation, path expressions may not contain the * and ** "
2033
0
                            "tokens or an array range, argument index: {}, row index: {}",
2034
0
                            i, row_idx);
2035
0
                }
2036
2037
190
                if constexpr (modify_type == JsonbModifyType::Insert) {
2038
59
                    if (find_result.value) {
2039
18
                        continue;
2040
18
                    }
2041
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2042
67
                    if (!find_result.value) {
2043
11
                        continue;
2044
11
                    }
2045
67
                }
2046
2047
97
                std::vector<const JsonbValue*> parents;
2048
2049
190
                bool replace = false;
2050
190
                parents.emplace_back(json_documents[row_idx]->getValue());
2051
190
                if (find_result.value) {
2052
                    // find target path, replace it with the new value.
2053
100
                    replace = true;
2054
100
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2055
100
                                               json_path[path_index], parents)) {
2056
0
                        DCHECK(false);
2057
0
                        continue;
2058
0
                    }
2059
100
                } else {
2060
                    // does not find target path, insert the new value.
2061
90
                    JsonbPath new_path;
2062
146
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2063
56
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2064
56
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2065
56
                                current_leg->leg_ptr, current_leg->leg_len,
2066
56
                                current_leg->array_index, current_leg->type);
2067
56
                        new_path.add_leg_to_leg_vector(std::move(leg));
2068
56
                    }
2069
2070
90
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2071
90
                                               parents)) {
2072
12
                        continue;
2073
12
                    }
2074
90
                }
2075
2076
178
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2077
178
                leg_info* last_leg =
2078
178
                        legs_count > 0
2079
178
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2080
178
                                : nullptr;
2081
178
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2082
178
                                                 json_value[value_index], replace, last_leg,
2083
178
                                                 writer));
2084
2085
178
                auto* writer_output = writer.getOutput();
2086
178
                if (writer_output->getSize() > tmp_buffer.capacity) {
2087
65
                    tmp_buffer.capacity =
2088
65
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2089
65
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2090
65
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2091
65
                }
2092
2093
178
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2094
178
                tmp_buffer.size = writer_output->getSize();
2095
2096
178
                writer.reset();
2097
2098
178
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2099
178
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2100
178
            }
2101
2102
135
            if (!null_map[row_idx]) {
2103
102
                const auto* jsonb_document = json_documents[row_idx];
2104
102
                const auto size = jsonb_document->numPackedBytes();
2105
102
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2106
102
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2107
102
            }
2108
2109
135
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2110
2111
135
            if (!null_map[row_idx]) {
2112
102
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2113
102
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2114
102
                const JsonbDocument* doc = nullptr;
2115
102
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2116
102
                        reinterpret_cast<const char*>(ptr), size, &doc));
2117
102
            }
2118
135
        }
2119
2120
104
        block.get_by_position(result).column = std::move(result_column);
2121
104
        return Status::OK();
2122
75
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1871
28
                        uint32_t result, size_t input_rows_count) const override {
1872
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1873
0
            return Status::InvalidArgument(
1874
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1875
0
                    "but got: {}",
1876
0
                    name, arguments.size());
1877
0
        }
1878
1879
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1880
1881
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1882
1883
28
        auto result_column = return_data_type->create_column();
1884
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1885
28
        auto& null_map = result_nullable_col.get_null_map_data();
1886
28
        auto& res_string_column =
1887
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1888
28
        auto& res_chars = res_string_column.get_chars();
1889
28
        auto& res_offsets = res_string_column.get_offsets();
1890
1891
28
        null_map.resize_fill(input_rows_count, 0);
1892
28
        res_offsets.resize(input_rows_count);
1893
28
        auto&& [json_data_arg_column, json_data_const] =
1894
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1895
1896
28
        if (json_data_const) {
1897
5
            if (json_data_arg_column->is_null_at(0)) {
1898
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1899
0
            }
1900
5
        }
1901
1902
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1903
28
        std::vector<bool> json_path_constant(keys_count);
1904
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1905
1906
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1907
28
        std::vector<bool> json_value_constant(keys_count);
1908
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1909
1910
28
        const NullMap* json_data_null_map = nullptr;
1911
28
        const ColumnString* json_data_column;
1912
28
        if (json_data_arg_column->is_nullable()) {
1913
28
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1914
28
            json_data_null_map = &nullable_column.get_null_map_data();
1915
28
            const auto& nested_column = nullable_column.get_nested_column();
1916
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1917
28
        } else {
1918
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1919
0
        }
1920
1921
63
        for (size_t i = 1; i < arguments.size(); i += 2) {
1922
35
            auto&& [path_column, path_const] =
1923
35
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1924
35
            auto&& [value_column, value_const] =
1925
35
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1926
1927
35
            if (path_const) {
1928
7
                if (path_column->is_null_at(0)) {
1929
0
                    return create_all_null_result(return_data_type, block, result,
1930
0
                                                  input_rows_count);
1931
0
                }
1932
7
            }
1933
1934
35
            json_path_constant[i / 2] = path_const;
1935
35
            if (path_column->is_nullable()) {
1936
4
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1937
4
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1938
4
                const auto& nested_column = nullable_column.get_nested_column();
1939
4
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1940
31
            } else {
1941
31
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1942
31
            }
1943
1944
35
            json_value_constant[i / 2] = value_const;
1945
35
            if (value_column->is_nullable()) {
1946
16
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1947
16
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1948
16
                const auto& nested_column = nullable_column.get_nested_column();
1949
16
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1950
19
            } else {
1951
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1952
19
            }
1953
35
        }
1954
1955
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1956
28
        if (json_data_const) {
1957
5
            auto json_data_string = json_data_column->get_data_at(0);
1958
5
            const JsonbDocument* doc = nullptr;
1959
5
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1960
5
                                                                  json_data_string.size, &doc));
1961
5
            if (!doc || !doc->getValue()) [[unlikely]] {
1962
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1963
0
            }
1964
30
            for (size_t i = 0; i != input_rows_count; ++i) {
1965
25
                json_documents[i] = doc;
1966
25
            }
1967
23
        } else {
1968
46
            for (size_t i = 0; i != input_rows_count; ++i) {
1969
23
                if (json_data_null_map && (*json_data_null_map)[i]) {
1970
0
                    null_map[i] = 1;
1971
0
                    json_documents[i] = nullptr;
1972
0
                    continue;
1973
0
                }
1974
1975
23
                auto json_data_string = json_data_column->get_data_at(i);
1976
23
                const JsonbDocument* doc = nullptr;
1977
23
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1978
23
                                                                      json_data_string.size, &doc));
1979
23
                if (!doc || !doc->getValue()) [[unlikely]] {
1980
0
                    null_map[i] = 1;
1981
0
                    continue;
1982
0
                }
1983
23
                json_documents[i] = doc;
1984
23
            }
1985
23
        }
1986
1987
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1988
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1989
1990
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1991
28
                                               json_path_columns, json_path_constant,
1992
28
                                               json_path_null_maps, json_value_columns,
1993
28
                                               json_value_constant, json_value_null_maps));
1994
1995
24
        JsonbWriter writer;
1996
24
        struct DocumentBuffer {
1997
24
            DorisUniqueBufferPtr<char> ptr;
1998
24
            size_t size = 0;
1999
24
            size_t capacity = 0;
2000
24
        };
2001
2002
24
        DocumentBuffer tmp_buffer;
2003
2004
77
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2005
114
            for (size_t i = 1; i < arguments.size(); i += 2) {
2006
61
                const size_t index = i / 2;
2007
61
                auto& json_path = json_paths[index];
2008
61
                auto& json_value = json_values[index];
2009
2010
61
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2011
61
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2012
2013
61
                if (null_map[row_idx]) {
2014
0
                    continue;
2015
0
                }
2016
2017
61
                if (json_documents[row_idx] == nullptr) {
2018
0
                    null_map[row_idx] = 1;
2019
0
                    continue;
2020
0
                }
2021
2022
61
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2023
2
                    null_map[row_idx] = 1;
2024
2
                    continue;
2025
2
                }
2026
2027
59
                auto find_result =
2028
59
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2029
2030
59
                if (find_result.is_wildcard) {
2031
0
                    return Status::InvalidArgument(
2032
0
                            " In this situation, path expressions may not contain the * and ** "
2033
0
                            "tokens or an array range, argument index: {}, row index: {}",
2034
0
                            i, row_idx);
2035
0
                }
2036
2037
59
                if constexpr (modify_type == JsonbModifyType::Insert) {
2038
59
                    if (find_result.value) {
2039
18
                        continue;
2040
18
                    }
2041
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2042
                    if (!find_result.value) {
2043
                        continue;
2044
                    }
2045
                }
2046
2047
41
                std::vector<const JsonbValue*> parents;
2048
2049
59
                bool replace = false;
2050
59
                parents.emplace_back(json_documents[row_idx]->getValue());
2051
59
                if (find_result.value) {
2052
                    // find target path, replace it with the new value.
2053
0
                    replace = true;
2054
0
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2055
0
                                               json_path[path_index], parents)) {
2056
0
                        DCHECK(false);
2057
0
                        continue;
2058
0
                    }
2059
59
                } else {
2060
                    // does not find target path, insert the new value.
2061
59
                    JsonbPath new_path;
2062
98
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2063
39
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2064
39
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2065
39
                                current_leg->leg_ptr, current_leg->leg_len,
2066
39
                                current_leg->array_index, current_leg->type);
2067
39
                        new_path.add_leg_to_leg_vector(std::move(leg));
2068
39
                    }
2069
2070
59
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2071
59
                                               parents)) {
2072
1
                        continue;
2073
1
                    }
2074
59
                }
2075
2076
58
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2077
58
                leg_info* last_leg =
2078
58
                        legs_count > 0
2079
58
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2080
58
                                : nullptr;
2081
58
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2082
58
                                                 json_value[value_index], replace, last_leg,
2083
58
                                                 writer));
2084
2085
58
                auto* writer_output = writer.getOutput();
2086
58
                if (writer_output->getSize() > tmp_buffer.capacity) {
2087
19
                    tmp_buffer.capacity =
2088
19
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2089
19
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2090
19
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2091
19
                }
2092
2093
58
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2094
58
                tmp_buffer.size = writer_output->getSize();
2095
2096
58
                writer.reset();
2097
2098
58
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2099
58
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2100
58
            }
2101
2102
53
            if (!null_map[row_idx]) {
2103
33
                const auto* jsonb_document = json_documents[row_idx];
2104
33
                const auto size = jsonb_document->numPackedBytes();
2105
33
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2106
33
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2107
33
            }
2108
2109
53
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2110
2111
53
            if (!null_map[row_idx]) {
2112
33
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2113
33
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2114
33
                const JsonbDocument* doc = nullptr;
2115
33
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2116
33
                        reinterpret_cast<const char*>(ptr), size, &doc));
2117
33
            }
2118
53
        }
2119
2120
42
        block.get_by_position(result).column = std::move(result_column);
2121
42
        return Status::OK();
2122
24
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1871
27
                        uint32_t result, size_t input_rows_count) const override {
1872
27
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1873
0
            return Status::InvalidArgument(
1874
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1875
0
                    "but got: {}",
1876
0
                    name, arguments.size());
1877
0
        }
1878
1879
27
        const size_t keys_count = (arguments.size() - 1) / 2;
1880
1881
27
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1882
1883
27
        auto result_column = return_data_type->create_column();
1884
27
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1885
27
        auto& null_map = result_nullable_col.get_null_map_data();
1886
27
        auto& res_string_column =
1887
27
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1888
27
        auto& res_chars = res_string_column.get_chars();
1889
27
        auto& res_offsets = res_string_column.get_offsets();
1890
1891
27
        null_map.resize_fill(input_rows_count, 0);
1892
27
        res_offsets.resize(input_rows_count);
1893
27
        auto&& [json_data_arg_column, json_data_const] =
1894
27
                unpack_if_const(block.get_by_position(arguments[0]).column);
1895
1896
27
        if (json_data_const) {
1897
3
            if (json_data_arg_column->is_null_at(0)) {
1898
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1899
0
            }
1900
3
        }
1901
1902
27
        std::vector<const ColumnString*> json_path_columns(keys_count);
1903
27
        std::vector<bool> json_path_constant(keys_count);
1904
27
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1905
1906
27
        std::vector<const ColumnString*> json_value_columns(keys_count);
1907
27
        std::vector<bool> json_value_constant(keys_count);
1908
27
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1909
1910
27
        const NullMap* json_data_null_map = nullptr;
1911
27
        const ColumnString* json_data_column;
1912
27
        if (json_data_arg_column->is_nullable()) {
1913
27
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1914
27
            json_data_null_map = &nullable_column.get_null_map_data();
1915
27
            const auto& nested_column = nullable_column.get_nested_column();
1916
27
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1917
27
        } else {
1918
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1919
0
        }
1920
1921
62
        for (size_t i = 1; i < arguments.size(); i += 2) {
1922
35
            auto&& [path_column, path_const] =
1923
35
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1924
35
            auto&& [value_column, value_const] =
1925
35
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1926
1927
35
            if (path_const) {
1928
9
                if (path_column->is_null_at(0)) {
1929
0
                    return create_all_null_result(return_data_type, block, result,
1930
0
                                                  input_rows_count);
1931
0
                }
1932
9
            }
1933
1934
35
            json_path_constant[i / 2] = path_const;
1935
35
            if (path_column->is_nullable()) {
1936
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1937
1
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1938
1
                const auto& nested_column = nullable_column.get_nested_column();
1939
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1940
34
            } else {
1941
34
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1942
34
            }
1943
1944
35
            json_value_constant[i / 2] = value_const;
1945
35
            if (value_column->is_nullable()) {
1946
16
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1947
16
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1948
16
                const auto& nested_column = nullable_column.get_nested_column();
1949
16
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1950
19
            } else {
1951
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1952
19
            }
1953
35
        }
1954
1955
27
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1956
27
        if (json_data_const) {
1957
3
            auto json_data_string = json_data_column->get_data_at(0);
1958
3
            const JsonbDocument* doc = nullptr;
1959
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1960
3
                                                                  json_data_string.size, &doc));
1961
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1962
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1963
0
            }
1964
17
            for (size_t i = 0; i != input_rows_count; ++i) {
1965
14
                json_documents[i] = doc;
1966
14
            }
1967
24
        } else {
1968
48
            for (size_t i = 0; i != input_rows_count; ++i) {
1969
24
                if (json_data_null_map && (*json_data_null_map)[i]) {
1970
0
                    null_map[i] = 1;
1971
0
                    json_documents[i] = nullptr;
1972
0
                    continue;
1973
0
                }
1974
1975
24
                auto json_data_string = json_data_column->get_data_at(i);
1976
24
                const JsonbDocument* doc = nullptr;
1977
24
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1978
24
                                                                      json_data_string.size, &doc));
1979
24
                if (!doc || !doc->getValue()) [[unlikely]] {
1980
0
                    null_map[i] = 1;
1981
0
                    continue;
1982
0
                }
1983
24
                json_documents[i] = doc;
1984
24
            }
1985
24
        }
1986
1987
27
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1988
27
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1989
1990
27
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1991
27
                                               json_path_columns, json_path_constant,
1992
27
                                               json_path_null_maps, json_value_columns,
1993
27
                                               json_value_constant, json_value_null_maps));
1994
1995
25
        JsonbWriter writer;
1996
25
        struct DocumentBuffer {
1997
25
            DorisUniqueBufferPtr<char> ptr;
1998
25
            size_t size = 0;
1999
25
            size_t capacity = 0;
2000
25
        };
2001
2002
25
        DocumentBuffer tmp_buffer;
2003
2004
61
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2005
101
            for (size_t i = 1; i < arguments.size(); i += 2) {
2006
65
                const size_t index = i / 2;
2007
65
                auto& json_path = json_paths[index];
2008
65
                auto& json_value = json_values[index];
2009
2010
65
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2011
65
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2012
2013
65
                if (null_map[row_idx]) {
2014
0
                    continue;
2015
0
                }
2016
2017
65
                if (json_documents[row_idx] == nullptr) {
2018
0
                    null_map[row_idx] = 1;
2019
0
                    continue;
2020
0
                }
2021
2022
65
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2023
1
                    null_map[row_idx] = 1;
2024
1
                    continue;
2025
1
                }
2026
2027
64
                auto find_result =
2028
64
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2029
2030
64
                if (find_result.is_wildcard) {
2031
0
                    return Status::InvalidArgument(
2032
0
                            " In this situation, path expressions may not contain the * and ** "
2033
0
                            "tokens or an array range, argument index: {}, row index: {}",
2034
0
                            i, row_idx);
2035
0
                }
2036
2037
                if constexpr (modify_type == JsonbModifyType::Insert) {
2038
                    if (find_result.value) {
2039
                        continue;
2040
                    }
2041
64
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2042
64
                    if (!find_result.value) {
2043
64
                        continue;
2044
64
                    }
2045
64
                }
2046
2047
64
                std::vector<const JsonbValue*> parents;
2048
2049
64
                bool replace = false;
2050
64
                parents.emplace_back(json_documents[row_idx]->getValue());
2051
64
                if (find_result.value) {
2052
                    // find target path, replace it with the new value.
2053
44
                    replace = true;
2054
44
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2055
44
                                               json_path[path_index], parents)) {
2056
0
                        DCHECK(false);
2057
0
                        continue;
2058
0
                    }
2059
44
                } else {
2060
                    // does not find target path, insert the new value.
2061
20
                    JsonbPath new_path;
2062
37
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2063
17
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2064
17
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2065
17
                                current_leg->leg_ptr, current_leg->leg_len,
2066
17
                                current_leg->array_index, current_leg->type);
2067
17
                        new_path.add_leg_to_leg_vector(std::move(leg));
2068
17
                    }
2069
2070
20
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2071
20
                                               parents)) {
2072
11
                        continue;
2073
11
                    }
2074
20
                }
2075
2076
53
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2077
53
                leg_info* last_leg =
2078
53
                        legs_count > 0
2079
53
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2080
53
                                : nullptr;
2081
53
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2082
53
                                                 json_value[value_index], replace, last_leg,
2083
53
                                                 writer));
2084
2085
53
                auto* writer_output = writer.getOutput();
2086
53
                if (writer_output->getSize() > tmp_buffer.capacity) {
2087
23
                    tmp_buffer.capacity =
2088
23
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2089
23
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2090
23
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2091
23
                }
2092
2093
53
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2094
53
                tmp_buffer.size = writer_output->getSize();
2095
2096
53
                writer.reset();
2097
2098
53
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2099
53
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2100
53
            }
2101
2102
36
            if (!null_map[row_idx]) {
2103
35
                const auto* jsonb_document = json_documents[row_idx];
2104
35
                const auto size = jsonb_document->numPackedBytes();
2105
35
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2106
35
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2107
35
            }
2108
2109
36
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2110
2111
36
            if (!null_map[row_idx]) {
2112
35
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2113
35
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2114
35
                const JsonbDocument* doc = nullptr;
2115
35
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2116
35
                        reinterpret_cast<const char*>(ptr), size, &doc));
2117
35
            }
2118
36
        }
2119
2120
25
        block.get_by_position(result).column = std::move(result_column);
2121
25
        return Status::OK();
2122
25
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1871
28
                        uint32_t result, size_t input_rows_count) const override {
1872
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1873
0
            return Status::InvalidArgument(
1874
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1875
0
                    "but got: {}",
1876
0
                    name, arguments.size());
1877
0
        }
1878
1879
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1880
1881
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1882
1883
28
        auto result_column = return_data_type->create_column();
1884
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1885
28
        auto& null_map = result_nullable_col.get_null_map_data();
1886
28
        auto& res_string_column =
1887
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1888
28
        auto& res_chars = res_string_column.get_chars();
1889
28
        auto& res_offsets = res_string_column.get_offsets();
1890
1891
28
        null_map.resize_fill(input_rows_count, 0);
1892
28
        res_offsets.resize(input_rows_count);
1893
28
        auto&& [json_data_arg_column, json_data_const] =
1894
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1895
1896
28
        if (json_data_const) {
1897
3
            if (json_data_arg_column->is_null_at(0)) {
1898
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1899
0
            }
1900
3
        }
1901
1902
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1903
28
        std::vector<bool> json_path_constant(keys_count);
1904
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1905
1906
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1907
28
        std::vector<bool> json_value_constant(keys_count);
1908
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1909
1910
28
        const NullMap* json_data_null_map = nullptr;
1911
28
        const ColumnString* json_data_column;
1912
28
        if (json_data_arg_column->is_nullable()) {
1913
28
            const auto& nullable_column = assert_cast<const ColumnNullable&>(*json_data_arg_column);
1914
28
            json_data_null_map = &nullable_column.get_null_map_data();
1915
28
            const auto& nested_column = nullable_column.get_nested_column();
1916
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1917
28
        } else {
1918
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1919
0
        }
1920
1921
66
        for (size_t i = 1; i < arguments.size(); i += 2) {
1922
38
            auto&& [path_column, path_const] =
1923
38
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1924
38
            auto&& [value_column, value_const] =
1925
38
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1926
1927
38
            if (path_const) {
1928
11
                if (path_column->is_null_at(0)) {
1929
0
                    return create_all_null_result(return_data_type, block, result,
1930
0
                                                  input_rows_count);
1931
0
                }
1932
11
            }
1933
1934
38
            json_path_constant[i / 2] = path_const;
1935
38
            if (path_column->is_nullable()) {
1936
1
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*path_column);
1937
1
                json_path_null_maps[i / 2] = &nullable_column.get_null_map_data();
1938
1
                const auto& nested_column = nullable_column.get_nested_column();
1939
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1940
37
            } else {
1941
37
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1942
37
            }
1943
1944
38
            json_value_constant[i / 2] = value_const;
1945
38
            if (value_column->is_nullable()) {
1946
19
                const auto& nullable_column = assert_cast<const ColumnNullable&>(*value_column);
1947
19
                json_value_null_maps[i / 2] = &nullable_column.get_null_map_data();
1948
19
                const auto& nested_column = nullable_column.get_nested_column();
1949
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1950
19
            } else {
1951
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1952
19
            }
1953
38
        }
1954
1955
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1956
28
        if (json_data_const) {
1957
3
            auto json_data_string = json_data_column->get_data_at(0);
1958
3
            const JsonbDocument* doc = nullptr;
1959
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1960
3
                                                                  json_data_string.size, &doc));
1961
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1962
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1963
0
            }
1964
15
            for (size_t i = 0; i != input_rows_count; ++i) {
1965
12
                json_documents[i] = doc;
1966
12
            }
1967
25
        } else {
1968
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1969
25
                if (json_data_null_map && (*json_data_null_map)[i]) {
1970
0
                    null_map[i] = 1;
1971
0
                    json_documents[i] = nullptr;
1972
0
                    continue;
1973
0
                }
1974
1975
25
                auto json_data_string = json_data_column->get_data_at(i);
1976
25
                const JsonbDocument* doc = nullptr;
1977
25
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1978
25
                                                                      json_data_string.size, &doc));
1979
25
                if (!doc || !doc->getValue()) [[unlikely]] {
1980
0
                    null_map[i] = 1;
1981
0
                    continue;
1982
0
                }
1983
25
                json_documents[i] = doc;
1984
25
            }
1985
25
        }
1986
1987
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1988
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1989
1990
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1991
28
                                               json_path_columns, json_path_constant,
1992
28
                                               json_path_null_maps, json_value_columns,
1993
28
                                               json_value_constant, json_value_null_maps));
1994
1995
26
        JsonbWriter writer;
1996
26
        struct DocumentBuffer {
1997
26
            DorisUniqueBufferPtr<char> ptr;
1998
26
            size_t size = 0;
1999
26
            size_t capacity = 0;
2000
26
        };
2001
2002
26
        DocumentBuffer tmp_buffer;
2003
2004
72
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
2005
114
            for (size_t i = 1; i < arguments.size(); i += 2) {
2006
68
                const size_t index = i / 2;
2007
68
                auto& json_path = json_paths[index];
2008
68
                auto& json_value = json_values[index];
2009
2010
68
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
2011
68
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
2012
2013
68
                if (null_map[row_idx]) {
2014
0
                    continue;
2015
0
                }
2016
2017
68
                if (json_documents[row_idx] == nullptr) {
2018
0
                    null_map[row_idx] = 1;
2019
0
                    continue;
2020
0
                }
2021
2022
68
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2023
1
                    null_map[row_idx] = 1;
2024
1
                    continue;
2025
1
                }
2026
2027
67
                auto find_result =
2028
67
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2029
2030
67
                if (find_result.is_wildcard) {
2031
0
                    return Status::InvalidArgument(
2032
0
                            " In this situation, path expressions may not contain the * and ** "
2033
0
                            "tokens or an array range, argument index: {}, row index: {}",
2034
0
                            i, row_idx);
2035
0
                }
2036
2037
                if constexpr (modify_type == JsonbModifyType::Insert) {
2038
                    if (find_result.value) {
2039
                        continue;
2040
                    }
2041
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2042
67
                    if (!find_result.value) {
2043
11
                        continue;
2044
11
                    }
2045
67
                }
2046
2047
56
                std::vector<const JsonbValue*> parents;
2048
2049
67
                bool replace = false;
2050
67
                parents.emplace_back(json_documents[row_idx]->getValue());
2051
67
                if (find_result.value) {
2052
                    // find target path, replace it with the new value.
2053
56
                    replace = true;
2054
56
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2055
56
                                               json_path[path_index], parents)) {
2056
0
                        DCHECK(false);
2057
0
                        continue;
2058
0
                    }
2059
56
                } else {
2060
                    // does not find target path, insert the new value.
2061
11
                    JsonbPath new_path;
2062
11
                    for (size_t j = 0; j < json_path[path_index].get_leg_vector_size() - 1; ++j) {
2063
0
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2064
0
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2065
0
                                current_leg->leg_ptr, current_leg->leg_len,
2066
0
                                current_leg->array_index, current_leg->type);
2067
0
                        new_path.add_leg_to_leg_vector(std::move(leg));
2068
0
                    }
2069
2070
11
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2071
11
                                               parents)) {
2072
0
                        continue;
2073
0
                    }
2074
11
                }
2075
2076
67
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2077
67
                leg_info* last_leg =
2078
67
                        legs_count > 0
2079
67
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2080
67
                                : nullptr;
2081
67
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2082
67
                                                 json_value[value_index], replace, last_leg,
2083
67
                                                 writer));
2084
2085
67
                auto* writer_output = writer.getOutput();
2086
67
                if (writer_output->getSize() > tmp_buffer.capacity) {
2087
23
                    tmp_buffer.capacity =
2088
23
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2089
23
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2090
23
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2091
23
                }
2092
2093
67
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2094
67
                tmp_buffer.size = writer_output->getSize();
2095
2096
67
                writer.reset();
2097
2098
67
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2099
67
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2100
67
            }
2101
2102
46
            if (!null_map[row_idx]) {
2103
34
                const auto* jsonb_document = json_documents[row_idx];
2104
34
                const auto size = jsonb_document->numPackedBytes();
2105
34
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2106
34
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2107
34
            }
2108
2109
46
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2110
2111
46
            if (!null_map[row_idx]) {
2112
34
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2113
34
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2114
34
                const JsonbDocument* doc = nullptr;
2115
34
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2116
34
                        reinterpret_cast<const char*>(ptr), size, &doc));
2117
34
            }
2118
46
        }
2119
2120
37
        block.get_by_position(result).column = std::move(result_column);
2121
37
        return Status::OK();
2122
26
    }
2123
2124
    bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path,
2125
342
                               std::vector<const JsonbValue*>& parents) const {
2126
342
        const size_t index = parents.size() - 1;
2127
342
        if (index == path.get_leg_vector_size()) {
2128
143
            return true;
2129
143
        }
2130
2131
199
        JsonbPath current;
2132
199
        auto* current_leg = path.get_leg_from_leg_vector(index);
2133
199
        std::unique_ptr<leg_info> leg =
2134
199
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2135
199
                                           current_leg->array_index, current_leg->type);
2136
199
        current.add_leg_to_leg_vector(std::move(leg));
2137
2138
199
        auto find_result = root->findValue(current);
2139
199
        if (!find_result.value) {
2140
12
            std::string path_string;
2141
12
            current.to_string(&path_string);
2142
12
            return false;
2143
187
        } else if (find_result.value == root) {
2144
6
            return true;
2145
181
        } else {
2146
181
            parents.emplace_back(find_result.value);
2147
181
        }
2148
2149
181
        return build_parents_by_path(find_result.value, path, parents);
2150
199
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2125
79
                               std::vector<const JsonbValue*>& parents) const {
2126
79
        const size_t index = parents.size() - 1;
2127
79
        if (index == path.get_leg_vector_size()) {
2128
40
            return true;
2129
40
        }
2130
2131
39
        JsonbPath current;
2132
39
        auto* current_leg = path.get_leg_from_leg_vector(index);
2133
39
        std::unique_ptr<leg_info> leg =
2134
39
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2135
39
                                           current_leg->array_index, current_leg->type);
2136
39
        current.add_leg_to_leg_vector(std::move(leg));
2137
2138
39
        auto find_result = root->findValue(current);
2139
39
        if (!find_result.value) {
2140
1
            std::string path_string;
2141
1
            current.to_string(&path_string);
2142
1
            return false;
2143
38
        } else if (find_result.value == root) {
2144
0
            return true;
2145
38
        } else {
2146
38
            parents.emplace_back(find_result.value);
2147
38
        }
2148
2149
38
        return build_parents_by_path(find_result.value, path, parents);
2150
39
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2125
132
                               std::vector<const JsonbValue*>& parents) const {
2126
132
        const size_t index = parents.size() - 1;
2127
132
        if (index == path.get_leg_vector_size()) {
2128
50
            return true;
2129
50
        }
2130
2131
82
        JsonbPath current;
2132
82
        auto* current_leg = path.get_leg_from_leg_vector(index);
2133
82
        std::unique_ptr<leg_info> leg =
2134
82
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2135
82
                                           current_leg->array_index, current_leg->type);
2136
82
        current.add_leg_to_leg_vector(std::move(leg));
2137
2138
82
        auto find_result = root->findValue(current);
2139
82
        if (!find_result.value) {
2140
11
            std::string path_string;
2141
11
            current.to_string(&path_string);
2142
11
            return false;
2143
71
        } else if (find_result.value == root) {
2144
3
            return true;
2145
68
        } else {
2146
68
            parents.emplace_back(find_result.value);
2147
68
        }
2148
2149
68
        return build_parents_by_path(find_result.value, path, parents);
2150
82
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2125
131
                               std::vector<const JsonbValue*>& parents) const {
2126
131
        const size_t index = parents.size() - 1;
2127
131
        if (index == path.get_leg_vector_size()) {
2128
53
            return true;
2129
53
        }
2130
2131
78
        JsonbPath current;
2132
78
        auto* current_leg = path.get_leg_from_leg_vector(index);
2133
78
        std::unique_ptr<leg_info> leg =
2134
78
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2135
78
                                           current_leg->array_index, current_leg->type);
2136
78
        current.add_leg_to_leg_vector(std::move(leg));
2137
2138
78
        auto find_result = root->findValue(current);
2139
78
        if (!find_result.value) {
2140
0
            std::string path_string;
2141
0
            current.to_string(&path_string);
2142
0
            return false;
2143
78
        } else if (find_result.value == root) {
2144
3
            return true;
2145
75
        } else {
2146
75
            parents.emplace_back(find_result.value);
2147
75
        }
2148
2149
75
        return build_parents_by_path(find_result.value, path, parents);
2150
78
    }
2151
2152
    Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents,
2153
                            const size_t parent_index, const JsonbValue* value, const bool replace,
2154
330
                            const leg_info* last_leg, JsonbWriter& writer) const {
2155
330
        if (parent_index >= parents.size()) {
2156
0
            return Status::InvalidArgument(
2157
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2158
0
                    parent_index, parents.size());
2159
0
        }
2160
2161
330
        if (parents[parent_index] != root) {
2162
0
            return Status::InvalidArgument(
2163
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2164
0
                    "parents size: {}",
2165
0
                    parent_index, parents.size());
2166
0
        }
2167
2168
330
        if (parent_index == parents.size() - 1 && replace) {
2169
            // We are at the last parent, write the value directly
2170
100
            if (value == nullptr) {
2171
24
                writer.writeNull();
2172
76
            } else {
2173
76
                writer.writeValue(value);
2174
76
            }
2175
100
            return Status::OK();
2176
100
        }
2177
2178
230
        bool value_written = false;
2179
230
        bool is_last_parent = (parent_index == parents.size() - 1);
2180
230
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2181
230
        if (root->isArray()) {
2182
21
            writer.writeStartArray();
2183
21
            const auto* array_val = root->unpack<ArrayVal>();
2184
63
            for (int i = 0; i != array_val->numElem(); ++i) {
2185
42
                auto* it = array_val->get(i);
2186
2187
42
                if (is_last_parent && last_leg->array_index == i) {
2188
0
                    value_written = true;
2189
0
                    writer.writeValue(value);
2190
42
                } else if (it == next_parent) {
2191
13
                    value_written = true;
2192
13
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2193
13
                                                     last_leg, writer));
2194
29
                } else {
2195
29
                    writer.writeValue(it);
2196
29
                }
2197
42
            }
2198
21
            if (is_last_parent && !value_written) {
2199
8
                value_written = true;
2200
8
                writer.writeValue(value);
2201
8
            }
2202
2203
21
            writer.writeEndArray();
2204
2205
209
        } else {
2206
            /**
2207
                Because even for a non-array object, `$[0]` can still point to that object:
2208
                ```
2209
                select json_extract('{"key": "value"}', '$[0]');
2210
                +------------------------------------------+
2211
                | json_extract('{"key": "value"}', '$[0]') |
2212
                +------------------------------------------+
2213
                | {"key": "value"}                         |
2214
                +------------------------------------------+
2215
                ```
2216
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2217
                it should be converted to an array before insertion:
2218
                ```
2219
                select json_insert('123','$[1]', null);
2220
                +---------------------------------+
2221
                | json_insert('123','$[1]', null) |
2222
                +---------------------------------+
2223
                | [123, null]                     |
2224
                +---------------------------------+
2225
                ```
2226
             */
2227
209
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2228
8
                writer.writeStartArray();
2229
8
                writer.writeValue(root);
2230
8
                writer.writeValue(value);
2231
8
                writer.writeEndArray();
2232
8
                return Status::OK();
2233
201
            } else if (root->isObject()) {
2234
201
                writer.writeStartObject();
2235
201
                const auto* object_val = root->unpack<ObjectVal>();
2236
399
                for (const auto& it : *object_val) {
2237
399
                    writer.writeKey(it.getKeyStr(), it.klen());
2238
399
                    if (it.value() == next_parent) {
2239
168
                        value_written = true;
2240
168
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2241
168
                                                         value, replace, last_leg, writer));
2242
231
                    } else {
2243
231
                        writer.writeValue(it.value());
2244
231
                    }
2245
399
                }
2246
2247
201
                if (is_last_parent && !value_written) {
2248
33
                    value_written = true;
2249
33
                    writer.writeStartObject();
2250
33
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2251
33
                    writer.writeValue(value);
2252
33
                    writer.writeEndObject();
2253
33
                }
2254
201
                writer.writeEndObject();
2255
2256
201
            } else {
2257
0
                return Status::InvalidArgument("Cannot insert value into this type");
2258
0
            }
2259
209
        }
2260
2261
222
        if (!value_written) {
2262
0
            return Status::InvalidArgument(
2263
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2264
0
                    parent_index, parents.size());
2265
0
        }
2266
2267
222
        return Status::OK();
2268
222
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2154
78
                            const leg_info* last_leg, JsonbWriter& writer) const {
2155
78
        if (parent_index >= parents.size()) {
2156
0
            return Status::InvalidArgument(
2157
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2158
0
                    parent_index, parents.size());
2159
0
        }
2160
2161
78
        if (parents[parent_index] != root) {
2162
0
            return Status::InvalidArgument(
2163
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2164
0
                    "parents size: {}",
2165
0
                    parent_index, parents.size());
2166
0
        }
2167
2168
78
        if (parent_index == parents.size() - 1 && replace) {
2169
            // We are at the last parent, write the value directly
2170
0
            if (value == nullptr) {
2171
0
                writer.writeNull();
2172
0
            } else {
2173
0
                writer.writeValue(value);
2174
0
            }
2175
0
            return Status::OK();
2176
0
        }
2177
2178
78
        bool value_written = false;
2179
78
        bool is_last_parent = (parent_index == parents.size() - 1);
2180
78
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2181
78
        if (root->isArray()) {
2182
4
            writer.writeStartArray();
2183
4
            const auto* array_val = root->unpack<ArrayVal>();
2184
12
            for (int i = 0; i != array_val->numElem(); ++i) {
2185
8
                auto* it = array_val->get(i);
2186
2187
8
                if (is_last_parent && last_leg->array_index == i) {
2188
0
                    value_written = true;
2189
0
                    writer.writeValue(value);
2190
8
                } else if (it == next_parent) {
2191
0
                    value_written = true;
2192
0
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2193
0
                                                     last_leg, writer));
2194
8
                } else {
2195
8
                    writer.writeValue(it);
2196
8
                }
2197
8
            }
2198
4
            if (is_last_parent && !value_written) {
2199
4
                value_written = true;
2200
4
                writer.writeValue(value);
2201
4
            }
2202
2203
4
            writer.writeEndArray();
2204
2205
74
        } else {
2206
            /**
2207
                Because even for a non-array object, `$[0]` can still point to that object:
2208
                ```
2209
                select json_extract('{"key": "value"}', '$[0]');
2210
                +------------------------------------------+
2211
                | json_extract('{"key": "value"}', '$[0]') |
2212
                +------------------------------------------+
2213
                | {"key": "value"}                         |
2214
                +------------------------------------------+
2215
                ```
2216
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2217
                it should be converted to an array before insertion:
2218
                ```
2219
                select json_insert('123','$[1]', null);
2220
                +---------------------------------+
2221
                | json_insert('123','$[1]', null) |
2222
                +---------------------------------+
2223
                | [123, null]                     |
2224
                +---------------------------------+
2225
                ```
2226
             */
2227
74
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2228
4
                writer.writeStartArray();
2229
4
                writer.writeValue(root);
2230
4
                writer.writeValue(value);
2231
4
                writer.writeEndArray();
2232
4
                return Status::OK();
2233
70
            } else if (root->isObject()) {
2234
70
                writer.writeStartObject();
2235
70
                const auto* object_val = root->unpack<ObjectVal>();
2236
70
                for (const auto& it : *object_val) {
2237
68
                    writer.writeKey(it.getKeyStr(), it.klen());
2238
68
                    if (it.value() == next_parent) {
2239
38
                        value_written = true;
2240
38
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2241
38
                                                         value, replace, last_leg, writer));
2242
38
                    } else {
2243
30
                        writer.writeValue(it.value());
2244
30
                    }
2245
68
                }
2246
2247
70
                if (is_last_parent && !value_written) {
2248
32
                    value_written = true;
2249
32
                    writer.writeStartObject();
2250
32
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2251
32
                    writer.writeValue(value);
2252
32
                    writer.writeEndObject();
2253
32
                }
2254
70
                writer.writeEndObject();
2255
2256
70
            } else {
2257
0
                return Status::InvalidArgument("Cannot insert value into this type");
2258
0
            }
2259
74
        }
2260
2261
74
        if (!value_written) {
2262
0
            return Status::InvalidArgument(
2263
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2264
0
                    parent_index, parents.size());
2265
0
        }
2266
2267
74
        return Status::OK();
2268
74
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2154
121
                            const leg_info* last_leg, JsonbWriter& writer) const {
2155
121
        if (parent_index >= parents.size()) {
2156
0
            return Status::InvalidArgument(
2157
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2158
0
                    parent_index, parents.size());
2159
0
        }
2160
2161
121
        if (parents[parent_index] != root) {
2162
0
            return Status::InvalidArgument(
2163
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2164
0
                    "parents size: {}",
2165
0
                    parent_index, parents.size());
2166
0
        }
2167
2168
121
        if (parent_index == parents.size() - 1 && replace) {
2169
            // We are at the last parent, write the value directly
2170
44
            if (value == nullptr) {
2171
10
                writer.writeNull();
2172
34
            } else {
2173
34
                writer.writeValue(value);
2174
34
            }
2175
44
            return Status::OK();
2176
44
        }
2177
2178
77
        bool value_written = false;
2179
77
        bool is_last_parent = (parent_index == parents.size() - 1);
2180
77
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2181
77
        if (root->isArray()) {
2182
9
            writer.writeStartArray();
2183
9
            const auto* array_val = root->unpack<ArrayVal>();
2184
27
            for (int i = 0; i != array_val->numElem(); ++i) {
2185
18
                auto* it = array_val->get(i);
2186
2187
18
                if (is_last_parent && last_leg->array_index == i) {
2188
0
                    value_written = true;
2189
0
                    writer.writeValue(value);
2190
18
                } else if (it == next_parent) {
2191
5
                    value_written = true;
2192
5
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2193
5
                                                     last_leg, writer));
2194
13
                } else {
2195
13
                    writer.writeValue(it);
2196
13
                }
2197
18
            }
2198
9
            if (is_last_parent && !value_written) {
2199
4
                value_written = true;
2200
4
                writer.writeValue(value);
2201
4
            }
2202
2203
9
            writer.writeEndArray();
2204
2205
68
        } else {
2206
            /**
2207
                Because even for a non-array object, `$[0]` can still point to that object:
2208
                ```
2209
                select json_extract('{"key": "value"}', '$[0]');
2210
                +------------------------------------------+
2211
                | json_extract('{"key": "value"}', '$[0]') |
2212
                +------------------------------------------+
2213
                | {"key": "value"}                         |
2214
                +------------------------------------------+
2215
                ```
2216
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2217
                it should be converted to an array before insertion:
2218
                ```
2219
                select json_insert('123','$[1]', null);
2220
                +---------------------------------+
2221
                | json_insert('123','$[1]', null) |
2222
                +---------------------------------+
2223
                | [123, null]                     |
2224
                +---------------------------------+
2225
                ```
2226
             */
2227
68
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2228
4
                writer.writeStartArray();
2229
4
                writer.writeValue(root);
2230
4
                writer.writeValue(value);
2231
4
                writer.writeEndArray();
2232
4
                return Status::OK();
2233
64
            } else if (root->isObject()) {
2234
64
                writer.writeStartObject();
2235
64
                const auto* object_val = root->unpack<ObjectVal>();
2236
156
                for (const auto& it : *object_val) {
2237
156
                    writer.writeKey(it.getKeyStr(), it.klen());
2238
156
                    if (it.value() == next_parent) {
2239
63
                        value_written = true;
2240
63
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2241
63
                                                         value, replace, last_leg, writer));
2242
93
                    } else {
2243
93
                        writer.writeValue(it.value());
2244
93
                    }
2245
156
                }
2246
2247
64
                if (is_last_parent && !value_written) {
2248
1
                    value_written = true;
2249
1
                    writer.writeStartObject();
2250
1
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2251
1
                    writer.writeValue(value);
2252
1
                    writer.writeEndObject();
2253
1
                }
2254
64
                writer.writeEndObject();
2255
2256
64
            } else {
2257
0
                return Status::InvalidArgument("Cannot insert value into this type");
2258
0
            }
2259
68
        }
2260
2261
73
        if (!value_written) {
2262
0
            return Status::InvalidArgument(
2263
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2264
0
                    parent_index, parents.size());
2265
0
        }
2266
2267
73
        return Status::OK();
2268
73
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2154
131
                            const leg_info* last_leg, JsonbWriter& writer) const {
2155
131
        if (parent_index >= parents.size()) {
2156
0
            return Status::InvalidArgument(
2157
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2158
0
                    parent_index, parents.size());
2159
0
        }
2160
2161
131
        if (parents[parent_index] != root) {
2162
0
            return Status::InvalidArgument(
2163
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2164
0
                    "parents size: {}",
2165
0
                    parent_index, parents.size());
2166
0
        }
2167
2168
131
        if (parent_index == parents.size() - 1 && replace) {
2169
            // We are at the last parent, write the value directly
2170
56
            if (value == nullptr) {
2171
14
                writer.writeNull();
2172
42
            } else {
2173
42
                writer.writeValue(value);
2174
42
            }
2175
56
            return Status::OK();
2176
56
        }
2177
2178
75
        bool value_written = false;
2179
75
        bool is_last_parent = (parent_index == parents.size() - 1);
2180
75
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2181
75
        if (root->isArray()) {
2182
8
            writer.writeStartArray();
2183
8
            const auto* array_val = root->unpack<ArrayVal>();
2184
24
            for (int i = 0; i != array_val->numElem(); ++i) {
2185
16
                auto* it = array_val->get(i);
2186
2187
16
                if (is_last_parent && last_leg->array_index == i) {
2188
0
                    value_written = true;
2189
0
                    writer.writeValue(value);
2190
16
                } else if (it == next_parent) {
2191
8
                    value_written = true;
2192
8
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2193
8
                                                     last_leg, writer));
2194
8
                } else {
2195
8
                    writer.writeValue(it);
2196
8
                }
2197
16
            }
2198
8
            if (is_last_parent && !value_written) {
2199
0
                value_written = true;
2200
0
                writer.writeValue(value);
2201
0
            }
2202
2203
8
            writer.writeEndArray();
2204
2205
67
        } else {
2206
            /**
2207
                Because even for a non-array object, `$[0]` can still point to that object:
2208
                ```
2209
                select json_extract('{"key": "value"}', '$[0]');
2210
                +------------------------------------------+
2211
                | json_extract('{"key": "value"}', '$[0]') |
2212
                +------------------------------------------+
2213
                | {"key": "value"}                         |
2214
                +------------------------------------------+
2215
                ```
2216
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2217
                it should be converted to an array before insertion:
2218
                ```
2219
                select json_insert('123','$[1]', null);
2220
                +---------------------------------+
2221
                | json_insert('123','$[1]', null) |
2222
                +---------------------------------+
2223
                | [123, null]                     |
2224
                +---------------------------------+
2225
                ```
2226
             */
2227
67
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2228
0
                writer.writeStartArray();
2229
0
                writer.writeValue(root);
2230
0
                writer.writeValue(value);
2231
0
                writer.writeEndArray();
2232
0
                return Status::OK();
2233
67
            } else if (root->isObject()) {
2234
67
                writer.writeStartObject();
2235
67
                const auto* object_val = root->unpack<ObjectVal>();
2236
175
                for (const auto& it : *object_val) {
2237
175
                    writer.writeKey(it.getKeyStr(), it.klen());
2238
175
                    if (it.value() == next_parent) {
2239
67
                        value_written = true;
2240
67
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2241
67
                                                         value, replace, last_leg, writer));
2242
108
                    } else {
2243
108
                        writer.writeValue(it.value());
2244
108
                    }
2245
175
                }
2246
2247
67
                if (is_last_parent && !value_written) {
2248
0
                    value_written = true;
2249
0
                    writer.writeStartObject();
2250
0
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2251
0
                    writer.writeValue(value);
2252
0
                    writer.writeEndObject();
2253
0
                }
2254
67
                writer.writeEndObject();
2255
2256
67
            } else {
2257
0
                return Status::InvalidArgument("Cannot insert value into this type");
2258
0
            }
2259
67
        }
2260
2261
75
        if (!value_written) {
2262
0
            return Status::InvalidArgument(
2263
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2264
0
                    parent_index, parents.size());
2265
0
        }
2266
2267
75
        return Status::OK();
2268
75
    }
2269
2270
    Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths,
2271
                                  DorisVector<DorisVector<const JsonbValue*>>& json_values,
2272
                                  const ColumnNumbers& arguments, const size_t input_rows_count,
2273
                                  const std::vector<const ColumnString*>& json_path_columns,
2274
                                  const std::vector<bool>& json_path_constant,
2275
                                  const std::vector<const NullMap*>& json_path_null_maps,
2276
                                  const std::vector<const ColumnString*>& json_value_columns,
2277
                                  const std::vector<bool>& json_value_constant,
2278
83
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2279
183
        for (size_t i = 1; i < arguments.size(); i += 2) {
2280
108
            const size_t index = i / 2;
2281
108
            const auto* json_path_column = json_path_columns[index];
2282
108
            const auto* value_column = json_value_columns[index];
2283
2284
108
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2285
108
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2286
2287
217
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2288
117
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2289
6
                    continue;
2290
6
                }
2291
2292
111
                auto path_string = json_path_column->get_data_at(row_idx);
2293
111
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2294
3
                    return Status::InvalidArgument(
2295
3
                            "Json path error: Invalid Json Path for value: {}, "
2296
3
                            "argument "
2297
3
                            "index: {}, row index: {}",
2298
3
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2299
3
                }
2300
2301
108
                if (json_paths[index][row_idx].is_wildcard()) {
2302
5
                    return Status::InvalidArgument(
2303
5
                            "In this situation, path expressions may not contain the * and ** "
2304
5
                            "tokens, argument index: {}, row index: {}",
2305
5
                            i, row_idx);
2306
5
                }
2307
108
            }
2308
2309
294
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2310
194
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2311
48
                    continue;
2312
48
                }
2313
2314
146
                auto value_string = value_column->get_data_at(row_idx);
2315
146
                const JsonbDocument* doc = nullptr;
2316
146
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2317
146
                                                                      value_string.size, &doc));
2318
146
                if (doc) {
2319
146
                    json_values[index][row_idx] = doc->getValue();
2320
146
                }
2321
146
            }
2322
100
        }
2323
2324
75
        return Status::OK();
2325
83
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2278
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2279
59
        for (size_t i = 1; i < arguments.size(); i += 2) {
2280
35
            const size_t index = i / 2;
2281
35
            const auto* json_path_column = json_path_columns[index];
2282
35
            const auto* value_column = json_value_columns[index];
2283
2284
35
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2285
35
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2286
2287
75
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2288
44
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2289
4
                    continue;
2290
4
                }
2291
2292
40
                auto path_string = json_path_column->get_data_at(row_idx);
2293
40
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2294
1
                    return Status::InvalidArgument(
2295
1
                            "Json path error: Invalid Json Path for value: {}, "
2296
1
                            "argument "
2297
1
                            "index: {}, row index: {}",
2298
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2299
1
                }
2300
2301
39
                if (json_paths[index][row_idx].is_wildcard()) {
2302
3
                    return Status::InvalidArgument(
2303
3
                            "In this situation, path expressions may not contain the * and ** "
2304
3
                            "tokens, argument index: {}, row index: {}",
2305
3
                            i, row_idx);
2306
3
                }
2307
39
            }
2308
2309
92
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2310
61
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2311
14
                    continue;
2312
14
                }
2313
2314
47
                auto value_string = value_column->get_data_at(row_idx);
2315
47
                const JsonbDocument* doc = nullptr;
2316
47
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2317
47
                                                                      value_string.size, &doc));
2318
47
                if (doc) {
2319
47
                    json_values[index][row_idx] = doc->getValue();
2320
47
                }
2321
47
            }
2322
31
        }
2323
2324
24
        return Status::OK();
2325
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2278
27
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2279
60
        for (size_t i = 1; i < arguments.size(); i += 2) {
2280
35
            const size_t index = i / 2;
2281
35
            const auto* json_path_column = json_path_columns[index];
2282
35
            const auto* value_column = json_value_columns[index];
2283
2284
35
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2285
35
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2286
2287
68
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2288
35
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2289
1
                    continue;
2290
1
                }
2291
2292
34
                auto path_string = json_path_column->get_data_at(row_idx);
2293
34
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2294
1
                    return Status::InvalidArgument(
2295
1
                            "Json path error: Invalid Json Path for value: {}, "
2296
1
                            "argument "
2297
1
                            "index: {}, row index: {}",
2298
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2299
1
                }
2300
2301
33
                if (json_paths[index][row_idx].is_wildcard()) {
2302
1
                    return Status::InvalidArgument(
2303
1
                            "In this situation, path expressions may not contain the * and ** "
2304
1
                            "tokens, argument index: {}, row index: {}",
2305
1
                            i, row_idx);
2306
1
                }
2307
33
            }
2308
2309
98
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2310
65
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2311
16
                    continue;
2312
16
                }
2313
2314
49
                auto value_string = value_column->get_data_at(row_idx);
2315
49
                const JsonbDocument* doc = nullptr;
2316
49
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2317
49
                                                                      value_string.size, &doc));
2318
49
                if (doc) {
2319
49
                    json_values[index][row_idx] = doc->getValue();
2320
49
                }
2321
49
            }
2322
33
        }
2323
2324
25
        return Status::OK();
2325
27
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2278
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2279
64
        for (size_t i = 1; i < arguments.size(); i += 2) {
2280
38
            const size_t index = i / 2;
2281
38
            const auto* json_path_column = json_path_columns[index];
2282
38
            const auto* value_column = json_value_columns[index];
2283
2284
38
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2285
38
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2286
2287
74
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2288
38
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2289
1
                    continue;
2290
1
                }
2291
2292
37
                auto path_string = json_path_column->get_data_at(row_idx);
2293
37
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2294
1
                    return Status::InvalidArgument(
2295
1
                            "Json path error: Invalid Json Path for value: {}, "
2296
1
                            "argument "
2297
1
                            "index: {}, row index: {}",
2298
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2299
1
                }
2300
2301
36
                if (json_paths[index][row_idx].is_wildcard()) {
2302
1
                    return Status::InvalidArgument(
2303
1
                            "In this situation, path expressions may not contain the * and ** "
2304
1
                            "tokens, argument index: {}, row index: {}",
2305
1
                            i, row_idx);
2306
1
                }
2307
36
            }
2308
2309
104
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2310
68
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2311
18
                    continue;
2312
18
                }
2313
2314
50
                auto value_string = value_column->get_data_at(row_idx);
2315
50
                const JsonbDocument* doc = nullptr;
2316
50
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2317
50
                                                                      value_string.size, &doc));
2318
50
                if (doc) {
2319
50
                    json_values[index][row_idx] = doc->getValue();
2320
50
                }
2321
50
            }
2322
36
        }
2323
2324
26
        return Status::OK();
2325
28
    }
2326
};
2327
2328
struct JsonbContainsAndPathImpl {
2329
57
    static DataTypes get_variadic_argument_types() {
2330
57
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(),
2331
57
                std::make_shared<DataTypeString>()};
2332
57
    }
2333
2334
    static Status execute_impl(FunctionContext* context, Block& block,
2335
                               const ColumnNumbers& arguments, uint32_t result,
2336
126
                               size_t input_rows_count) {
2337
126
        return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result,
2338
126
                                                         input_rows_count);
2339
126
    }
2340
};
2341
2342
class FunctionJsonSearch : public IFunction {
2343
private:
2344
    using OneFun = std::function<Status(size_t, bool*)>;
2345
65
    static Status always_one(size_t i, bool* res) {
2346
65
        *res = true;
2347
65
        return Status::OK();
2348
65
    }
2349
48
    static Status always_all(size_t i, bool* res) {
2350
48
        *res = false;
2351
48
        return Status::OK();
2352
48
    }
2353
2354
    using CheckNullFun = std::function<bool(size_t)>;
2355
294
    static bool always_not_null(size_t) { return false; }
2356
2357
    using GetJsonStringRefFun = std::function<StringRef(size_t)>;
2358
2359
329
    Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const {
2360
329
        StringRef pattern; // not used
2361
329
        StringRef value_val(str.data(), str.size());
2362
329
        return (state->scalar_function)(&state->search_state, value_val, pattern, res);
2363
329
    }
2364
2365
    /**
2366
     * Recursive search for matching string, if found, the result will be added to a vector
2367
     * @param element json element
2368
     * @param one_match
2369
     * @param search_str
2370
     * @param cur_path
2371
     * @param matches The path that has already been matched
2372
     * @return true if matched else false
2373
     */
2374
    bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state,
2375
717
                      JsonbPath* cur_path, std::unordered_set<std::string>* matches) const {
2376
717
        if (element->isString()) {
2377
329
            const auto* json_string = element->unpack<JsonbStringVal>();
2378
329
            const std::string_view element_str(json_string->getBlob(), json_string->length());
2379
329
            unsigned char res;
2380
329
            RETURN_IF_ERROR(matched(element_str, state, &res));
2381
329
            if (res) {
2382
206
                std::string str;
2383
206
                auto valid = cur_path->to_string(&str);
2384
206
                if (!valid) {
2385
0
                    return false;
2386
0
                }
2387
206
                return matches->insert(str).second;
2388
206
            } else {
2389
123
                return false;
2390
123
            }
2391
388
        } else if (element->isObject()) {
2392
195
            const auto* object = element->unpack<ObjectVal>();
2393
195
            bool find = false;
2394
201
            for (const auto& item : *object) {
2395
201
                Slice key(item.getKeyStr(), item.klen());
2396
201
                const auto* child_element = item.value();
2397
                // construct an object member path leg.
2398
201
                auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE);
2399
201
                cur_path->add_leg_to_leg_vector(std::move(leg));
2400
201
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2401
201
                cur_path->pop_leg_from_leg_vector();
2402
201
                if (one_match && find) {
2403
3
                    return true;
2404
3
                }
2405
201
            }
2406
192
            return find;
2407
195
        } else if (element->isArray()) {
2408
193
            const auto* array = element->unpack<ArrayVal>();
2409
193
            bool find = false;
2410
512
            for (int i = 0; i < array->numElem(); ++i) {
2411
385
                auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE);
2412
385
                cur_path->add_leg_to_leg_vector(std::move(leg));
2413
385
                const auto* child_element = array->get(i);
2414
                // construct an array cell path leg.
2415
385
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2416
385
                cur_path->pop_leg_from_leg_vector();
2417
385
                if (one_match && find) {
2418
66
                    return true;
2419
66
                }
2420
385
            }
2421
127
            return find;
2422
193
        } else {
2423
0
            return false;
2424
0
        }
2425
717
    }
2426
2427
    void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches,
2428
117
                         ColumnString* result_col) const {
2429
117
        if (matches.size() == 1) {
2430
86
            for (const auto& str_ref : matches) {
2431
86
                writer.writeStartString();
2432
86
                writer.writeString(str_ref);
2433
86
                writer.writeEndString();
2434
86
            }
2435
86
        } else {
2436
31
            writer.writeStartArray();
2437
120
            for (const auto& str_ref : matches) {
2438
120
                writer.writeStartString();
2439
120
                writer.writeString(str_ref);
2440
120
                writer.writeEndString();
2441
120
            }
2442
31
            writer.writeEndArray();
2443
31
        }
2444
2445
117
        result_col->insert_data(writer.getOutput()->getBuffer(),
2446
117
                                (size_t)writer.getOutput()->getSize());
2447
117
    }
2448
2449
    template <bool search_is_const>
2450
    Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check,
2451
                          GetJsonStringRefFun col_json_string, CheckNullFun one_null_check,
2452
                          OneFun one_check, CheckNullFun search_null_check,
2453
                          const ColumnString* col_search_string, FunctionContext* context,
2454
47
                          size_t result) const {
2455
47
        auto result_col = ColumnString::create();
2456
47
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2457
2458
47
        std::shared_ptr<LikeState> state_ptr;
2459
47
        LikeState* state = nullptr;
2460
47
        if (search_is_const) {
2461
8
            state = reinterpret_cast<LikeState*>(
2462
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2463
8
        }
2464
2465
47
        bool is_one = false;
2466
2467
47
        JsonbWriter writer;
2468
204
        for (size_t i = 0; i < input_rows_count; ++i) {
2469
            // an error occurs if the json_doc argument is not a valid json document.
2470
159
            if (json_null_check(i)) {
2471
12
                null_map->get_data()[i] = 1;
2472
12
                result_col->insert_data("", 0);
2473
12
                continue;
2474
12
            }
2475
147
            const auto& json_doc_str = col_json_string(i);
2476
147
            const JsonbDocument* json_doc = nullptr;
2477
147
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2478
147
                                                            &json_doc);
2479
147
            if (!st.ok()) {
2480
0
                return Status::InvalidArgument(
2481
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2482
0
                        st.to_string());
2483
0
            }
2484
2485
147
            if (!one_null_check(i)) {
2486
145
                RETURN_IF_ERROR(one_check(i, &is_one));
2487
145
            }
2488
2489
145
            if (one_null_check(i) || search_null_check(i)) {
2490
14
                null_map->get_data()[i] = 1;
2491
14
                result_col->insert_data("", 0);
2492
14
                continue;
2493
14
            }
2494
2495
            // an error occurs if any path argument is not a valid path expression.
2496
131
            std::string root_path_str = "$";
2497
131
            JsonbPath root_path;
2498
131
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2499
131
            std::vector<JsonbPath*> paths;
2500
131
            paths.push_back(&root_path);
2501
2502
131
            if (!search_is_const) {
2503
99
                state_ptr = std::make_shared<LikeState>();
2504
99
                state_ptr->is_like_pattern = true;
2505
99
                const auto& search_str = col_search_string->get_data_at(i);
2506
99
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2507
99
                                                                         state_ptr, false));
2508
99
                state = state_ptr.get();
2509
99
            }
2510
2511
            // maintain a hashset to deduplicate matches.
2512
131
            std::unordered_set<std::string> matches;
2513
131
            for (const auto& item : paths) {
2514
131
                auto* cur_path = item;
2515
131
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2516
131
                if (is_one && find) {
2517
66
                    break;
2518
66
                }
2519
131
            }
2520
131
            if (matches.empty()) {
2521
                // returns NULL if the search_str is not found in the document.
2522
14
                null_map->get_data()[i] = 1;
2523
14
                result_col->insert_data("", 0);
2524
14
                continue;
2525
14
            }
2526
2527
117
            writer.reset();
2528
117
            make_result_str(writer, matches, result_col.get());
2529
117
        }
2530
45
        auto result_col_nullable =
2531
45
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2532
45
        block.replace_by_position(result, std::move(result_col_nullable));
2533
45
        return Status::OK();
2534
47
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2454
8
                          size_t result) const {
2455
8
        auto result_col = ColumnString::create();
2456
8
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2457
2458
8
        std::shared_ptr<LikeState> state_ptr;
2459
8
        LikeState* state = nullptr;
2460
8
        if (search_is_const) {
2461
8
            state = reinterpret_cast<LikeState*>(
2462
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2463
8
        }
2464
2465
8
        bool is_one = false;
2466
2467
8
        JsonbWriter writer;
2468
44
        for (size_t i = 0; i < input_rows_count; ++i) {
2469
            // an error occurs if the json_doc argument is not a valid json document.
2470
36
            if (json_null_check(i)) {
2471
4
                null_map->get_data()[i] = 1;
2472
4
                result_col->insert_data("", 0);
2473
4
                continue;
2474
4
            }
2475
32
            const auto& json_doc_str = col_json_string(i);
2476
32
            const JsonbDocument* json_doc = nullptr;
2477
32
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2478
32
                                                            &json_doc);
2479
32
            if (!st.ok()) {
2480
0
                return Status::InvalidArgument(
2481
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2482
0
                        st.to_string());
2483
0
            }
2484
2485
32
            if (!one_null_check(i)) {
2486
32
                RETURN_IF_ERROR(one_check(i, &is_one));
2487
32
            }
2488
2489
32
            if (one_null_check(i) || search_null_check(i)) {
2490
0
                null_map->get_data()[i] = 1;
2491
0
                result_col->insert_data("", 0);
2492
0
                continue;
2493
0
            }
2494
2495
            // an error occurs if any path argument is not a valid path expression.
2496
32
            std::string root_path_str = "$";
2497
32
            JsonbPath root_path;
2498
32
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2499
32
            std::vector<JsonbPath*> paths;
2500
32
            paths.push_back(&root_path);
2501
2502
32
            if (!search_is_const) {
2503
0
                state_ptr = std::make_shared<LikeState>();
2504
0
                state_ptr->is_like_pattern = true;
2505
0
                const auto& search_str = col_search_string->get_data_at(i);
2506
0
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2507
0
                                                                         state_ptr, false));
2508
0
                state = state_ptr.get();
2509
0
            }
2510
2511
            // maintain a hashset to deduplicate matches.
2512
32
            std::unordered_set<std::string> matches;
2513
32
            for (const auto& item : paths) {
2514
32
                auto* cur_path = item;
2515
32
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2516
32
                if (is_one && find) {
2517
16
                    break;
2518
16
                }
2519
32
            }
2520
32
            if (matches.empty()) {
2521
                // returns NULL if the search_str is not found in the document.
2522
0
                null_map->get_data()[i] = 1;
2523
0
                result_col->insert_data("", 0);
2524
0
                continue;
2525
0
            }
2526
2527
32
            writer.reset();
2528
32
            make_result_str(writer, matches, result_col.get());
2529
32
        }
2530
8
        auto result_col_nullable =
2531
8
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2532
8
        block.replace_by_position(result, std::move(result_col_nullable));
2533
8
        return Status::OK();
2534
8
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2454
39
                          size_t result) const {
2455
39
        auto result_col = ColumnString::create();
2456
39
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2457
2458
39
        std::shared_ptr<LikeState> state_ptr;
2459
39
        LikeState* state = nullptr;
2460
39
        if (search_is_const) {
2461
0
            state = reinterpret_cast<LikeState*>(
2462
0
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2463
0
        }
2464
2465
39
        bool is_one = false;
2466
2467
39
        JsonbWriter writer;
2468
160
        for (size_t i = 0; i < input_rows_count; ++i) {
2469
            // an error occurs if the json_doc argument is not a valid json document.
2470
123
            if (json_null_check(i)) {
2471
8
                null_map->get_data()[i] = 1;
2472
8
                result_col->insert_data("", 0);
2473
8
                continue;
2474
8
            }
2475
115
            const auto& json_doc_str = col_json_string(i);
2476
115
            const JsonbDocument* json_doc = nullptr;
2477
115
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2478
115
                                                            &json_doc);
2479
115
            if (!st.ok()) {
2480
0
                return Status::InvalidArgument(
2481
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2482
0
                        st.to_string());
2483
0
            }
2484
2485
115
            if (!one_null_check(i)) {
2486
113
                RETURN_IF_ERROR(one_check(i, &is_one));
2487
113
            }
2488
2489
113
            if (one_null_check(i) || search_null_check(i)) {
2490
14
                null_map->get_data()[i] = 1;
2491
14
                result_col->insert_data("", 0);
2492
14
                continue;
2493
14
            }
2494
2495
            // an error occurs if any path argument is not a valid path expression.
2496
99
            std::string root_path_str = "$";
2497
99
            JsonbPath root_path;
2498
99
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2499
99
            std::vector<JsonbPath*> paths;
2500
99
            paths.push_back(&root_path);
2501
2502
99
            if (!search_is_const) {
2503
99
                state_ptr = std::make_shared<LikeState>();
2504
99
                state_ptr->is_like_pattern = true;
2505
99
                const auto& search_str = col_search_string->get_data_at(i);
2506
99
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2507
99
                                                                         state_ptr, false));
2508
99
                state = state_ptr.get();
2509
99
            }
2510
2511
            // maintain a hashset to deduplicate matches.
2512
99
            std::unordered_set<std::string> matches;
2513
99
            for (const auto& item : paths) {
2514
99
                auto* cur_path = item;
2515
99
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2516
99
                if (is_one && find) {
2517
50
                    break;
2518
50
                }
2519
99
            }
2520
99
            if (matches.empty()) {
2521
                // returns NULL if the search_str is not found in the document.
2522
14
                null_map->get_data()[i] = 1;
2523
14
                result_col->insert_data("", 0);
2524
14
                continue;
2525
14
            }
2526
2527
85
            writer.reset();
2528
85
            make_result_str(writer, matches, result_col.get());
2529
85
        }
2530
37
        auto result_col_nullable =
2531
37
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2532
37
        block.replace_by_position(result, std::move(result_col_nullable));
2533
37
        return Status::OK();
2534
39
    }
2535
2536
    static constexpr auto one = "one";
2537
    static constexpr auto all = "all";
2538
2539
public:
2540
    static constexpr auto name = "json_search";
2541
53
    static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); }
2542
2543
1
    String get_name() const override { return name; }
2544
45
    bool is_variadic() const override { return false; }
2545
44
    size_t get_number_of_arguments() const override { return 3; }
2546
2547
44
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2548
44
        return make_nullable(std::make_shared<DataTypeJsonb>());
2549
44
    }
2550
2551
102
    bool use_default_implementation_for_nulls() const override { return false; }
2552
2553
191
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
2554
191
        if (scope != FunctionContext::THREAD_LOCAL) {
2555
44
            return Status::OK();
2556
44
        }
2557
147
        if (context->is_col_constant(2)) {
2558
67
            std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
2559
67
            state->is_like_pattern = true;
2560
67
            const auto pattern_col = context->get_constant_col(2)->column_ptr;
2561
67
            const auto& pattern = pattern_col->get_data_at(0);
2562
67
            RETURN_IF_ERROR(
2563
67
                    FunctionLike::construct_like_const_state(context, pattern, state, false));
2564
67
            context->set_function_state(scope, state);
2565
67
        }
2566
147
        return Status::OK();
2567
147
    }
2568
2569
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2570
58
                        uint32_t result, size_t input_rows_count) const override {
2571
        // the json_doc, one_or_all, and search_str must be given.
2572
        // and we require the positions are static.
2573
58
        if (arguments.size() < 3) {
2574
0
            return Status::InvalidArgument("too few arguments for function {}", name);
2575
0
        }
2576
58
        if (arguments.size() > 3) {
2577
0
            return Status::NotSupported("escape and path params are not support now");
2578
0
        }
2579
2580
58
        CheckNullFun json_null_check = always_not_null;
2581
58
        GetJsonStringRefFun get_json_fun;
2582
        // prepare jsonb data column
2583
58
        auto&& [col_json, json_is_const] =
2584
58
                unpack_if_const(block.get_by_position(arguments[0]).column);
2585
58
        const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get());
2586
58
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) {
2587
58
            col_json_string =
2588
58
                    check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get());
2589
58
        }
2590
2591
58
        if (!col_json_string) {
2592
0
            return Status::RuntimeError("Illegal arg json {} should be ColumnString",
2593
0
                                        col_json->get_name());
2594
0
        }
2595
2596
58
        auto create_all_null_result = [&]() {
2597
6
            auto res_str = ColumnString::create();
2598
6
            res_str->insert_default();
2599
6
            auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1));
2600
6
            if (input_rows_count > 1) {
2601
6
                block.get_by_position(result).column =
2602
6
                        ColumnConst::create(std::move(res), input_rows_count);
2603
6
            } else {
2604
0
                block.get_by_position(result).column = std::move(res);
2605
0
            }
2606
6
            return Status::OK();
2607
6
        };
2608
2609
58
        if (json_is_const) {
2610
11
            if (col_json->is_null_at(0)) {
2611
2
                return create_all_null_result();
2612
9
            } else {
2613
9
                const auto& json_str = col_json_string->get_data_at(0);
2614
36
                get_json_fun = [json_str](size_t i) { return json_str; };
2615
9
            }
2616
47
        } else {
2617
123
            json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); };
2618
111
            get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); };
2619
47
        }
2620
2621
        // one_or_all
2622
56
        CheckNullFun one_null_check = always_not_null;
2623
56
        OneFun one_check = always_one;
2624
56
        auto&& [col_one, one_is_const] =
2625
56
                unpack_if_const(block.get_by_position(arguments[1]).column);
2626
56
        one_is_const |= input_rows_count == 1;
2627
56
        const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get());
2628
56
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) {
2629
9
            col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2630
9
        }
2631
56
        if (!col_one_string) {
2632
0
            return Status::RuntimeError("Illegal arg one {} should be ColumnString",
2633
0
                                        col_one->get_name());
2634
0
        }
2635
56
        if (one_is_const) {
2636
46
            if (col_one->is_null_at(0)) {
2637
4
                return create_all_null_result();
2638
42
            } else {
2639
42
                const auto& one_or_all = col_one_string->get_data_at(0);
2640
42
                std::string one_or_all_str = one_or_all.to_string();
2641
42
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2642
17
                    one_check = always_all;
2643
25
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2644
                    // nothing
2645
20
                } else {
2646
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2647
5
                    return Status::InvalidArgument(
2648
5
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2649
5
                }
2650
42
            }
2651
46
        } else {
2652
66
            one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); };
2653
32
            one_check = [col_one_string](size_t i, bool* is_one) {
2654
32
                const auto& one_or_all = col_one_string->get_data_at(i);
2655
32
                std::string one_or_all_str = one_or_all.to_string();
2656
32
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2657
18
                    *is_one = false;
2658
18
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2659
12
                    *is_one = true;
2660
12
                } else {
2661
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2662
2
                    return Status::InvalidArgument(
2663
2
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2664
2
                }
2665
30
                return Status::OK();
2666
32
            };
2667
10
        }
2668
2669
        // search_str
2670
47
        auto&& [col_search, search_is_const] =
2671
47
                unpack_if_const(block.get_by_position(arguments[2]).column);
2672
2673
47
        const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get());
2674
47
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) {
2675
26
            col_search_string =
2676
26
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2677
26
        }
2678
47
        if (!col_search_string) {
2679
0
            return Status::RuntimeError("Illegal arg pattern {} should be ColumnString",
2680
0
                                        col_search->get_name());
2681
0
        }
2682
47
        if (search_is_const) {
2683
8
            CheckNullFun search_null_check = always_not_null;
2684
8
            if (col_search->is_null_at(0)) {
2685
0
                return create_all_null_result();
2686
0
            }
2687
8
            RETURN_IF_ERROR(execute_vector<true>(
2688
8
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2689
8
                    one_check, search_null_check, col_search_string, context, result));
2690
39
        } else {
2691
111
            CheckNullFun search_null_check = [col_search](size_t i) {
2692
111
                return col_search->is_null_at(i);
2693
111
            };
2694
39
            RETURN_IF_ERROR(execute_vector<false>(
2695
39
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2696
39
                    one_check, search_null_check, col_search_string, context, result));
2697
39
        }
2698
45
        return Status::OK();
2699
47
    }
2700
};
2701
2702
struct DocumentBuffer {
2703
    std::unique_ptr<char[]> ptr;
2704
    size_t size = 0;
2705
    size_t capacity = 0;
2706
};
2707
2708
class FunctionJsonbRemove : public IFunction {
2709
public:
2710
    static constexpr auto name = "jsonb_remove";
2711
    static constexpr auto alias = "json_remove";
2712
2713
31
    static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); }
2714
2715
0
    String get_name() const override { return name; }
2716
2717
0
    size_t get_number_of_arguments() const override { return 0; }
2718
23
    bool is_variadic() const override { return true; }
2719
2720
44
    bool use_default_implementation_for_nulls() const override { return false; }
2721
2722
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2723
22
        return make_nullable(std::make_shared<DataTypeJsonb>());
2724
22
    }
2725
2726
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2727
22
                        uint32_t result, size_t input_rows_count) const override {
2728
22
        DCHECK_GE(arguments.size(), 2);
2729
2730
        // Check if arguments count is valid (json_doc + at least one path)
2731
22
        if (arguments.size() < 2) {
2732
0
            return Status::InvalidArgument("json_remove requires at least 2 arguments");
2733
0
        }
2734
2735
22
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
2736
22
        auto result_column = return_data_type->create_column();
2737
22
        auto& nullable_column = assert_cast<ColumnNullable&>(*result_column);
2738
22
        auto& res_chars =
2739
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars();
2740
22
        auto& res_offsets =
2741
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets();
2742
22
        auto& null_map = nullable_column.get_null_map_data();
2743
2744
22
        res_chars.reserve(input_rows_count * 64);
2745
22
        res_offsets.resize(input_rows_count);
2746
22
        null_map.resize_fill(input_rows_count, 0);
2747
2748
        // Get JSON document column
2749
22
        auto [json_column, json_const] =
2750
22
                unpack_if_const(block.get_by_position(arguments[0]).column);
2751
22
        const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get());
2752
22
        const ColumnString* json_data_column = nullptr;
2753
22
        const NullMap* json_null_map = nullptr;
2754
2755
22
        if (json_nullable) {
2756
22
            json_null_map = &json_nullable->get_null_map_data();
2757
22
            json_data_column =
2758
22
                    check_and_get_column<ColumnString>(&json_nullable->get_nested_column());
2759
22
        } else {
2760
0
            json_data_column = check_and_get_column<ColumnString>(json_column.get());
2761
0
        }
2762
2763
22
        if (!json_data_column) {
2764
0
            return Status::InvalidArgument("First argument must be a JSON document");
2765
0
        }
2766
2767
        // Parse paths
2768
22
        std::vector<const ColumnString*> path_columns;
2769
22
        std::vector<const NullMap*> path_null_maps;
2770
22
        std::vector<bool> path_constants;
2771
2772
51
        for (size_t i = 1; i < arguments.size(); ++i) {
2773
29
            auto [path_column, path_const] =
2774
29
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2775
29
            const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get());
2776
2777
29
            if (path_nullable) {
2778
6
                path_null_maps.push_back(&path_nullable->get_null_map_data());
2779
6
                path_columns.push_back(
2780
6
                        check_and_get_column<ColumnString>(&path_nullable->get_nested_column()));
2781
23
            } else {
2782
23
                path_null_maps.push_back(nullptr);
2783
23
                path_columns.push_back(check_and_get_column<ColumnString>(path_column.get()));
2784
23
            }
2785
2786
29
            if (!path_columns.back()) {
2787
0
                return Status::InvalidArgument(
2788
0
                        fmt::format("Argument {} must be a string path", i + 1));
2789
0
            }
2790
2791
29
            path_constants.push_back(path_const);
2792
29
        }
2793
2794
        // Reusable JsonbWriter for performance
2795
22
        JsonbWriter writer;
2796
2797
48
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) {
2798
28
            size_t json_idx = index_check_const(row_idx, json_const);
2799
2800
            // Check if JSON document is null
2801
28
            if (json_null_map && (*json_null_map)[json_idx]) {
2802
2
                null_map[row_idx] = 1;
2803
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2804
2
                continue;
2805
2
            }
2806
2807
            // Parse JSON document
2808
26
            const auto& json_data = json_data_column->get_data_at(json_idx);
2809
26
            const JsonbDocument* json_doc = nullptr;
2810
26
            Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data,
2811
26
                                                                        json_data.size, &json_doc);
2812
2813
26
            if (!parse_status.ok() || !json_doc) {
2814
0
                null_map[row_idx] = 1;
2815
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2816
0
                continue;
2817
0
            }
2818
2819
            // Check if any path is null
2820
26
            bool has_null_path = false;
2821
59
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2822
35
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2823
35
                if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) {
2824
2
                    has_null_path = true;
2825
2
                    break;
2826
2
                }
2827
35
            }
2828
2829
26
            if (has_null_path) {
2830
2
                null_map[row_idx] = 1;
2831
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2832
2
                continue;
2833
2
            }
2834
2835
24
            std::vector<JsonbPath> paths;
2836
24
            std::vector<bool> path_constants_vec;
2837
2838
54
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2839
32
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2840
32
                const auto& path_data = path_columns[path_idx]->get_data_at(idx);
2841
2842
32
                JsonbPath path;
2843
32
                if (!path.seek(path_data.data, path_data.size)) {
2844
1
                    return Status::InvalidArgument(
2845
1
                            "Json path error: Invalid Json Path for value: {} at row: {}",
2846
1
                            std::string_view(path_data.data, path_data.size), row_idx);
2847
1
                }
2848
2849
31
                if (path.is_wildcard() || path.is_supper_wildcard()) {
2850
1
                    return Status::InvalidArgument(
2851
1
                            "In this situation, path expressions may not contain the * and ** "
2852
1
                            "tokens or an array range, argument index: {}, row index: {}",
2853
1
                            path_idx + 1, row_idx);
2854
1
                }
2855
2856
30
                paths.push_back(std::move(path));
2857
30
                path_constants_vec.push_back(path_constants[path_idx]);
2858
30
            }
2859
2860
22
            const JsonbValue* current_value = json_doc->getValue();
2861
2862
22
            DocumentBuffer tmp_buffer;
2863
2864
52
            for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) {
2865
30
                writer.reset();
2866
2867
30
                auto find_result = current_value->findValue(paths[path_idx]);
2868
2869
30
                if (find_result.is_wildcard) {
2870
0
                    continue;
2871
0
                }
2872
2873
30
                if (find_result.value) {
2874
24
                    RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer));
2875
2876
24
                    auto* writer_output = writer.getOutput();
2877
24
                    if (writer_output->getSize() > tmp_buffer.capacity) {
2878
17
                        tmp_buffer.capacity =
2879
17
                                ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2880
17
                        tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity);
2881
17
                        DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2882
17
                    }
2883
2884
24
                    memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(),
2885
24
                           writer_output->getSize());
2886
24
                    tmp_buffer.size = writer_output->getSize();
2887
2888
24
                    const JsonbDocument* new_doc = nullptr;
2889
24
                    RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2890
24
                            tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc));
2891
2892
24
                    current_value = new_doc->getValue();
2893
24
                }
2894
30
            }
2895
2896
22
            const JsonbDocument* modified_doc = nullptr;
2897
22
            if (current_value != json_doc->getValue()) {
2898
17
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2899
17
                        tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc));
2900
17
            } else {
2901
5
                modified_doc = json_doc;
2902
5
            }
2903
2904
            // Write the final result
2905
22
            const auto size = modified_doc->numPackedBytes();
2906
22
            res_chars.insert(reinterpret_cast<const char*>(modified_doc),
2907
22
                             reinterpret_cast<const char*>(modified_doc) + size);
2908
22
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2909
22
        }
2910
2911
20
        block.get_by_position(result).column = std::move(result_column);
2912
20
        return Status::OK();
2913
22
    }
2914
2915
private:
2916
    Status clone_without_path(const JsonbValue* root, const JsonbPath& path,
2917
24
                              JsonbWriter& writer) const {
2918
        // Start writing at the root level
2919
24
        if (root->isObject()) {
2920
15
            writer.writeStartObject();
2921
15
            RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer));
2922
15
            writer.writeEndObject();
2923
15
        } else if (root->isArray()) {
2924
9
            writer.writeStartArray();
2925
9
            RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer));
2926
9
            writer.writeEndArray();
2927
9
        } else {
2928
            // Primitive value - can't remove anything from it
2929
0
            writer.writeValue(root);
2930
0
        }
2931
24
        return Status::OK();
2932
24
    }
2933
2934
    Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path,
2935
20
                                     size_t depth, JsonbWriter& writer) const {
2936
20
        const auto* obj = obj_value->unpack<ObjectVal>();
2937
2938
40
        for (const auto& kv : *obj) {
2939
40
            std::string key(kv.getKeyStr(), kv.klen());
2940
2941
40
            if (depth < path.get_leg_vector_size()) {
2942
40
                const auto* leg = path.get_leg_from_leg_vector(depth);
2943
40
                if (leg->type == MEMBER_CODE) {
2944
40
                    std::string target_key(leg->leg_ptr, leg->leg_len);
2945
2946
40
                    if (key == target_key) {
2947
20
                        if (depth == path.get_leg_vector_size() - 1) {
2948
12
                            continue;
2949
12
                        } else {
2950
8
                            writer.writeKey(kv.getKeyStr(), kv.klen());
2951
8
                            if (kv.value()->isObject()) {
2952
3
                                writer.writeStartObject();
2953
3
                                RETURN_IF_ERROR(clone_object_without_path(kv.value(), path,
2954
3
                                                                          depth + 1, writer));
2955
3
                                writer.writeEndObject();
2956
5
                            } else if (kv.value()->isArray()) {
2957
5
                                writer.writeStartArray();
2958
5
                                RETURN_IF_ERROR(clone_array_without_path(kv.value(), path,
2959
5
                                                                         depth + 1, writer));
2960
5
                                writer.writeEndArray();
2961
5
                            } else {
2962
0
                                writer.writeValue(kv.value());
2963
0
                            }
2964
8
                        }
2965
20
                    } else {
2966
20
                        writer.writeKey(kv.getKeyStr(), kv.klen());
2967
20
                        writer.writeValue(kv.value());
2968
20
                    }
2969
40
                } else {
2970
0
                    writer.writeKey(kv.getKeyStr(), kv.klen());
2971
0
                    writer.writeValue(kv.value());
2972
0
                }
2973
40
            } else {
2974
0
                writer.writeKey(kv.getKeyStr(), kv.klen());
2975
0
                writer.writeValue(kv.value());
2976
0
            }
2977
40
        }
2978
2979
20
        return Status::OK();
2980
20
    }
2981
2982
    Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path,
2983
17
                                    size_t depth, JsonbWriter& writer) const {
2984
17
        const auto* arr = arr_value->unpack<ArrayVal>();
2985
2986
17
        int index = 0;
2987
52
        for (const auto& element : *arr) {
2988
52
            if (depth < path.get_leg_vector_size()) {
2989
52
                const auto* leg = path.get_leg_from_leg_vector(depth);
2990
52
                if (leg->type == ARRAY_CODE) {
2991
52
                    int target_index = leg->array_index;
2992
2993
52
                    if (index == target_index) {
2994
17
                        if (depth == path.get_leg_vector_size() - 1) {
2995
                            // This is the target element to remove - skip it
2996
12
                        } else {
2997
5
                            if (element.isObject()) {
2998
2
                                writer.writeStartObject();
2999
2
                                RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1,
3000
2
                                                                          writer));
3001
2
                                writer.writeEndObject();
3002
3
                            } else if (element.isArray()) {
3003
3
                                writer.writeStartArray();
3004
3
                                RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1,
3005
3
                                                                         writer));
3006
3
                                writer.writeEndArray();
3007
3
                            } else {
3008
0
                                writer.writeValue(&element);
3009
0
                            }
3010
5
                        }
3011
35
                    } else {
3012
35
                        writer.writeValue(&element);
3013
35
                    }
3014
52
                } else {
3015
0
                    writer.writeValue(&element);
3016
0
                }
3017
52
            } else {
3018
0
                writer.writeValue(&element);
3019
0
            }
3020
52
            index++;
3021
52
        }
3022
3023
17
        return Status::OK();
3024
17
    }
3025
};
3026
3027
class FunctionStripNullValue : public IFunction {
3028
public:
3029
    static constexpr auto name = "strip_null_value";
3030
24
    static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); }
3031
3032
1
    String get_name() const override { return name; }
3033
16
    bool is_variadic() const override { return false; }
3034
15
    size_t get_number_of_arguments() const override { return 1; }
3035
3036
30
    bool use_default_implementation_for_nulls() const override { return false; }
3037
3038
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3039
15
        return make_nullable(std::make_shared<DataTypeJsonb>());
3040
15
    }
3041
3042
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3043
15
                        uint32_t result, size_t input_rows_count) const override {
3044
15
        const auto& arg_column = block.get_by_position(arguments[0]).column;
3045
15
        const ColumnString* json_column = nullptr;
3046
15
        const NullMap* json_null_map = nullptr;
3047
15
        if (arg_column->is_nullable()) {
3048
15
            const auto& nullable_col = assert_cast<const ColumnNullable&>(*arg_column);
3049
15
            json_column = assert_cast<const ColumnString*>(&nullable_col.get_nested_column());
3050
15
            json_null_map = &nullable_col.get_null_map_data();
3051
15
        } else {
3052
0
            json_column = assert_cast<const ColumnString*>(arg_column.get());
3053
0
        }
3054
3055
15
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
3056
15
        auto result_column = return_data_type->create_column();
3057
3058
15
        auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data();
3059
15
        auto& result_data_col = assert_cast<ColumnString&>(
3060
15
                assert_cast<ColumnNullable&>(*result_column).get_nested_column());
3061
3062
15
        result_nullmap.resize_fill(input_rows_count, 0);
3063
60
        for (size_t i = 0; i != input_rows_count; ++i) {
3064
45
            if (json_null_map && (*json_null_map)[i]) {
3065
13
                result_nullmap[i] = 1;
3066
13
                result_data_col.insert_default();
3067
13
                continue;
3068
13
            }
3069
32
            const JsonbDocument* json_doc = nullptr;
3070
32
            const auto& json_str = json_column->get_data_at(i);
3071
32
            RETURN_IF_ERROR(
3072
32
                    JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc));
3073
32
            if (json_doc) [[likely]] {
3074
32
                if (json_doc->getValue()->isNull()) {
3075
9
                    result_nullmap[i] = 1;
3076
9
                    result_data_col.insert_default();
3077
23
                } else {
3078
23
                    result_nullmap[i] = 0;
3079
23
                    result_data_col.insert_data(json_str.data, json_str.size);
3080
23
                }
3081
32
            } else {
3082
0
                result_nullmap[i] = 1;
3083
0
                result_data_col.insert_default();
3084
0
            }
3085
32
        }
3086
3087
15
        block.get_by_position(result).column = std::move(result_column);
3088
15
        return Status::OK();
3089
15
    }
3090
};
3091
3092
8
void register_function_jsonb(SimpleFunctionFactory& factory) {
3093
8
    factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
3094
8
    factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
3095
8
    factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null");
3096
8
    factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null");
3097
8
    factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value");
3098
8
    factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value");
3099
3100
8
    factory.register_function<FunctionJsonbExists>();
3101
8
    factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias);
3102
8
    factory.register_function<FunctionJsonbType>();
3103
8
    factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias);
3104
3105
8
    factory.register_function<FunctionJsonbKeys>();
3106
8
    factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias);
3107
3108
8
    factory.register_function<FunctionJsonbExtractIsnull>();
3109
8
    factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias);
3110
3111
8
    factory.register_function<FunctionJsonbExtractJsonb>();
3112
8
    factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias);
3113
8
    factory.register_function<FunctionJsonbExtractJsonbNoQuotes>();
3114
8
    factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name,
3115
8
                           FunctionJsonbExtractJsonbNoQuotes::alias);
3116
3117
8
    factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
3118
8
    factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
3119
3120
8
    factory.register_function<FunctionJsonSearch>();
3121
3122
8
    factory.register_function<FunctionJsonbArray<false>>();
3123
8
    factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias);
3124
3125
8
    factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null");
3126
8
    factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null");
3127
3128
8
    factory.register_function<FunctionJsonbObject>();
3129
8
    factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias);
3130
3131
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>();
3132
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name,
3133
8
                           FunctionJsonbModify<JsonbModifyType::Insert>::alias);
3134
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>();
3135
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name,
3136
8
                           FunctionJsonbModify<JsonbModifyType::Set>::alias);
3137
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>();
3138
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name,
3139
8
                           FunctionJsonbModify<JsonbModifyType::Replace>::alias);
3140
3141
8
    factory.register_function<FunctionJsonbRemove>();
3142
8
    factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias);
3143
3144
8
    factory.register_function<FunctionStripNullValue>();
3145
8
}
3146
3147
} // namespace doris