Coverage Report

Created: 2026-06-13 22:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_jsonb.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
20
#include <algorithm>
21
#include <cstdlib>
22
#include <memory>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <type_traits>
27
#include <utility>
28
#include <variant>
29
30
#include "common/compiler_util.h" // IWYU pragma: keep
31
#include "common/status.h"
32
#include "core/assert_cast.h"
33
#include "core/block/block.h"
34
#include "core/block/column_numbers.h"
35
#include "core/block/column_with_type_and_name.h"
36
#include "core/column/column.h"
37
#include "core/column/column_array.h"
38
#include "core/column/column_const.h"
39
#include "core/column/column_nullable.h"
40
#include "core/column/column_string.h"
41
#include "core/column/column_vector.h"
42
#include "core/custom_allocator.h"
43
#include "core/data_type/data_type.h"
44
#include "core/data_type/data_type_array.h"
45
#include "core/data_type/data_type_jsonb.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/data_type/data_type_string.h"
48
#include "core/data_type/define_primitive_type.h"
49
#include "core/data_type/primitive_type.h"
50
#include "core/string_ref.h"
51
#include "core/types.h"
52
#include "core/value/jsonb_value.h"
53
#include "exec/common/stringop_substring.h"
54
#include "exec/common/template_helpers.hpp"
55
#include "exec/common/util.hpp"
56
#include "exprs/aggregate/aggregate_function.h"
57
#include "exprs/function/function.h"
58
#include "exprs/function/like.h"
59
#include "exprs/function/simple_function_factory.h"
60
#include "exprs/function_context.h"
61
#include "util/jsonb_document.h"
62
#include "util/jsonb_stream.h"
63
#include "util/jsonb_utils.h"
64
#include "util/jsonb_writer.h"
65
#include "util/simd/bits.h"
66
67
namespace doris {
68
69
enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT };
70
71
enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE };
72
73
// func(string,string) -> json
74
template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode>
75
class FunctionJsonbParseBase : public IFunction {
76
private:
77
    struct FunctionJsonbParseState {
78
        StringRef default_value;
79
        JsonBinaryValue default_value_parser;
80
        bool has_const_default_value = false;
81
        bool default_is_null = false;
82
    };
83
84
public:
85
    static constexpr auto name = "json_parse";
86
    static constexpr auto alias = "jsonb_parse";
87
88
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv
Line
Count
Source
87
27
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv
Line
Count
Source
87
39
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv
Line
Count
Source
87
22
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
88
89
4
    String get_name() const override {
90
4
        String error_mode;
91
4
        switch (parse_error_handle_mode) {
92
1
        case JsonbParseErrorMode::FAIL:
93
1
            break;
94
1
        case JsonbParseErrorMode::RETURN_NULL:
95
1
            error_mode = "_error_to_null";
96
1
            break;
97
2
        case JsonbParseErrorMode::RETURN_VALUE:
98
2
            error_mode = "_error_to_value";
99
2
            break;
100
4
        }
101
102
4
        return name + error_mode;
103
4
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev
Line
Count
Source
89
1
    String get_name() const override {
90
1
        String error_mode;
91
1
        switch (parse_error_handle_mode) {
92
1
        case JsonbParseErrorMode::FAIL:
93
1
            break;
94
0
        case JsonbParseErrorMode::RETURN_NULL:
95
0
            error_mode = "_error_to_null";
96
0
            break;
97
0
        case JsonbParseErrorMode::RETURN_VALUE:
98
0
            error_mode = "_error_to_value";
99
0
            break;
100
1
        }
101
102
1
        return name + error_mode;
103
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev
Line
Count
Source
89
1
    String get_name() const override {
90
1
        String error_mode;
91
1
        switch (parse_error_handle_mode) {
92
0
        case JsonbParseErrorMode::FAIL:
93
0
            break;
94
1
        case JsonbParseErrorMode::RETURN_NULL:
95
1
            error_mode = "_error_to_null";
96
1
            break;
97
0
        case JsonbParseErrorMode::RETURN_VALUE:
98
0
            error_mode = "_error_to_value";
99
0
            break;
100
1
        }
101
102
1
        return name + error_mode;
103
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev
Line
Count
Source
89
2
    String get_name() const override {
90
2
        String error_mode;
91
2
        switch (parse_error_handle_mode) {
92
0
        case JsonbParseErrorMode::FAIL:
93
0
            break;
94
0
        case JsonbParseErrorMode::RETURN_NULL:
95
0
            error_mode = "_error_to_null";
96
0
            break;
97
2
        case JsonbParseErrorMode::RETURN_VALUE:
98
2
            error_mode = "_error_to_value";
99
2
            break;
100
2
        }
101
102
2
        return name + error_mode;
103
2
    }
104
105
65
    bool is_variadic() const override {
106
65
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
65
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv
Line
Count
Source
105
19
    bool is_variadic() const override {
106
19
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
19
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv
Line
Count
Source
105
31
    bool is_variadic() const override {
106
31
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
31
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv
Line
Count
Source
105
15
    bool is_variadic() const override {
106
15
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
107
15
    }
108
109
49
    size_t get_number_of_arguments() const override {
110
49
        switch (parse_error_handle_mode) {
111
18
        case JsonbParseErrorMode::FAIL:
112
18
            return 1;
113
30
        case JsonbParseErrorMode::RETURN_NULL:
114
30
            return 1;
115
1
        case JsonbParseErrorMode::RETURN_VALUE:
116
1
            return 0;
117
49
        }
118
49
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv
Line
Count
Source
109
18
    size_t get_number_of_arguments() const override {
110
18
        switch (parse_error_handle_mode) {
111
18
        case JsonbParseErrorMode::FAIL:
112
18
            return 1;
113
0
        case JsonbParseErrorMode::RETURN_NULL:
114
0
            return 1;
115
0
        case JsonbParseErrorMode::RETURN_VALUE:
116
0
            return 0;
117
18
        }
118
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv
Line
Count
Source
109
30
    size_t get_number_of_arguments() const override {
110
30
        switch (parse_error_handle_mode) {
111
0
        case JsonbParseErrorMode::FAIL:
112
0
            return 1;
113
30
        case JsonbParseErrorMode::RETURN_NULL:
114
30
            return 1;
115
0
        case JsonbParseErrorMode::RETURN_VALUE:
116
0
            return 0;
117
30
        }
118
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv
Line
Count
Source
109
1
    size_t get_number_of_arguments() const override {
110
1
        switch (parse_error_handle_mode) {
111
0
        case JsonbParseErrorMode::FAIL:
112
0
            return 1;
113
0
        case JsonbParseErrorMode::RETURN_NULL:
114
0
            return 1;
115
1
        case JsonbParseErrorMode::RETURN_VALUE:
116
1
            return 0;
117
1
        }
118
1
    }
119
120
61
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
61
        bool is_nullable = false;
122
61
        switch (nullable_mode) {
123
30
        case NullalbeMode::NULLABLE:
124
30
            is_nullable = true;
125
30
            break;
126
31
        case NullalbeMode::FOLLOW_INPUT: {
127
43
            for (auto arg : arguments) {
128
43
                is_nullable |= arg->is_nullable();
129
43
            }
130
31
            break;
131
0
        }
132
61
        }
133
134
61
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
61
                           : std::make_shared<DataTypeJsonb>();
136
61
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
18
        bool is_nullable = false;
122
18
        switch (nullable_mode) {
123
0
        case NullalbeMode::NULLABLE:
124
0
            is_nullable = true;
125
0
            break;
126
18
        case NullalbeMode::FOLLOW_INPUT: {
127
18
            for (auto arg : arguments) {
128
18
                is_nullable |= arg->is_nullable();
129
18
            }
130
18
            break;
131
0
        }
132
18
        }
133
134
18
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
18
                           : std::make_shared<DataTypeJsonb>();
136
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
30
        bool is_nullable = false;
122
30
        switch (nullable_mode) {
123
30
        case NullalbeMode::NULLABLE:
124
30
            is_nullable = true;
125
30
            break;
126
0
        case NullalbeMode::FOLLOW_INPUT: {
127
0
            for (auto arg : arguments) {
128
0
                is_nullable |= arg->is_nullable();
129
0
            }
130
0
            break;
131
0
        }
132
30
        }
133
134
30
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
30
                           : std::make_shared<DataTypeJsonb>();
136
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
120
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
121
13
        bool is_nullable = false;
122
13
        switch (nullable_mode) {
123
0
        case NullalbeMode::NULLABLE:
124
0
            is_nullable = true;
125
0
            break;
126
13
        case NullalbeMode::FOLLOW_INPUT: {
127
25
            for (auto arg : arguments) {
128
25
                is_nullable |= arg->is_nullable();
129
25
            }
130
13
            break;
131
0
        }
132
13
        }
133
134
13
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
135
13
                           : std::make_shared<DataTypeJsonb>();
136
13
    }
137
138
137
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
44
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
138
29
    bool use_default_implementation_for_nulls() const override { return false; }
139
140
334
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
334
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
60
            std::shared_ptr<FunctionJsonbParseState> state =
143
60
                    std::make_shared<FunctionJsonbParseState>();
144
60
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
60
        }
146
334
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
165
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
12
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
12
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
12
                if (state) {
151
12
                    if (context->get_num_args() == 2) {
152
9
                        if (context->is_col_constant(1)) {
153
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
2
                            if (default_value_col->is_null_at(0)) {
155
1
                                state->default_is_null = true;
156
1
                            } else {
157
1
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
1
                                state->default_value = default_value;
160
1
                                state->has_const_default_value = true;
161
1
                            }
162
2
                        }
163
9
                    } else if (context->get_num_args() == 1) {
164
2
                        RETURN_IF_ERROR(
165
2
                                state->default_value_parser.from_json_string(std::string("{}")));
166
2
                        state->default_value = StringRef(state->default_value_parser.value(),
167
2
                                                         state->default_value_parser.size());
168
2
                        state->has_const_default_value = true;
169
2
                    }
170
12
                }
171
12
            }
172
173
165
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
1
                return Status::InvalidArgument(
175
1
                        "{} function should have 1 or 2 arguments, "
176
1
                        "but got {}",
177
1
                        get_name(), context->get_num_args());
178
1
            }
179
165
        }
180
164
        return Status::OK();
181
334
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
60
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
60
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
18
            std::shared_ptr<FunctionJsonbParseState> state =
143
18
                    std::make_shared<FunctionJsonbParseState>();
144
18
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
18
        }
146
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
                if (state) {
151
                    if (context->get_num_args() == 2) {
152
                        if (context->is_col_constant(1)) {
153
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
                            if (default_value_col->is_null_at(0)) {
155
                                state->default_is_null = true;
156
                            } else {
157
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
                                state->default_value = default_value;
160
                                state->has_const_default_value = true;
161
                            }
162
                        }
163
                    } else if (context->get_num_args() == 1) {
164
                        RETURN_IF_ERROR(
165
                                state->default_value_parser.from_json_string(std::string("{}")));
166
                        state->default_value = StringRef(state->default_value_parser.value(),
167
                                                         state->default_value_parser.size());
168
                        state->has_const_default_value = true;
169
                    }
170
                }
171
            }
172
173
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
                return Status::InvalidArgument(
175
                        "{} function should have 1 or 2 arguments, "
176
                        "but got {}",
177
                        get_name(), context->get_num_args());
178
            }
179
        }
180
60
        return Status::OK();
181
60
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
109
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
109
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
30
            std::shared_ptr<FunctionJsonbParseState> state =
143
30
                    std::make_shared<FunctionJsonbParseState>();
144
30
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
30
        }
146
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
                if (state) {
151
                    if (context->get_num_args() == 2) {
152
                        if (context->is_col_constant(1)) {
153
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
                            if (default_value_col->is_null_at(0)) {
155
                                state->default_is_null = true;
156
                            } else {
157
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
                                state->default_value = default_value;
160
                                state->has_const_default_value = true;
161
                            }
162
                        }
163
                    } else if (context->get_num_args() == 1) {
164
                        RETURN_IF_ERROR(
165
                                state->default_value_parser.from_json_string(std::string("{}")));
166
                        state->default_value = StringRef(state->default_value_parser.value(),
167
                                                         state->default_value_parser.size());
168
                        state->has_const_default_value = true;
169
                    }
170
                }
171
            }
172
173
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
                return Status::InvalidArgument(
175
                        "{} function should have 1 or 2 arguments, "
176
                        "but got {}",
177
                        get_name(), context->get_num_args());
178
            }
179
        }
180
109
        return Status::OK();
181
109
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
140
165
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
141
165
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
142
12
            std::shared_ptr<FunctionJsonbParseState> state =
143
12
                    std::make_shared<FunctionJsonbParseState>();
144
12
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
145
12
        }
146
165
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
147
165
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
148
12
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
149
12
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
150
12
                if (state) {
151
12
                    if (context->get_num_args() == 2) {
152
9
                        if (context->is_col_constant(1)) {
153
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
154
2
                            if (default_value_col->is_null_at(0)) {
155
1
                                state->default_is_null = true;
156
1
                            } else {
157
1
                                const auto& default_value = default_value_col->get_data_at(0);
158
159
1
                                state->default_value = default_value;
160
1
                                state->has_const_default_value = true;
161
1
                            }
162
2
                        }
163
9
                    } else if (context->get_num_args() == 1) {
164
2
                        RETURN_IF_ERROR(
165
2
                                state->default_value_parser.from_json_string(std::string("{}")));
166
2
                        state->default_value = StringRef(state->default_value_parser.value(),
167
2
                                                         state->default_value_parser.size());
168
2
                        state->has_const_default_value = true;
169
2
                    }
170
12
                }
171
12
            }
172
173
165
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
174
1
                return Status::InvalidArgument(
175
1
                        "{} function should have 1 or 2 arguments, "
176
1
                        "but got {}",
177
1
                        get_name(), context->get_num_args());
178
1
            }
179
165
        }
180
164
        return Status::OK();
181
165
    }
182
183
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
184
76
                        uint32_t result, size_t input_rows_count) const override {
185
76
        auto&& [col_from, col_from_is_const] =
186
76
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
76
        if (col_from_is_const && col_from->is_null_at(0)) {
189
1
            auto col_str = ColumnString::create();
190
1
            col_str->insert_default();
191
1
            auto null_map = ColumnUInt8::create(1, 1);
192
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
1
            block.get_by_position(result).column =
194
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
1
            return Status::OK();
196
1
        }
197
198
75
        auto null_map = ColumnUInt8::create(0, 0);
199
75
        bool is_nullable = false;
200
201
75
        switch (nullable_mode) {
202
34
        case NullalbeMode::NULLABLE: {
203
34
            is_nullable = true;
204
34
            break;
205
0
        }
206
41
        case NullalbeMode::FOLLOW_INPUT: {
207
54
            for (auto arg : arguments) {
208
54
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
54
            }
210
41
            break;
211
0
        }
212
75
        }
213
214
75
        if (is_nullable) {
215
65
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
65
        }
217
218
60
        const ColumnString* col_from_string = nullptr;
219
75
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
220
40
            VectorizedUtils::update_null_map(null_map->get_data(),
221
40
                                             nullable_col->get_null_map_data(), col_from_is_const);
222
40
            col_from_string =
223
40
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
224
40
        } else {
225
35
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
226
35
        }
227
228
60
        StringRef constant_default_value;
229
60
        bool default_value_const = false;
230
60
        bool default_value_null_const = false;
231
60
        ColumnPtr default_value_col;
232
60
        JsonBinaryValue default_jsonb_value_parser;
233
60
        const ColumnString* default_value_str_col = nullptr;
234
60
        const NullMap* default_value_nullmap = nullptr;
235
60
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
236
15
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
237
15
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
238
15
            if (state && state->has_const_default_value) {
239
7
                constant_default_value = state->default_value;
240
7
                default_value_null_const = state->default_is_null;
241
7
                default_value_const = true;
242
8
            } else if (arguments.size() > 1) {
243
8
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
244
8
                    PrimitiveType::TYPE_JSONB) {
245
1
                    return Status::InvalidArgument(
246
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
247
1
                            block.get_by_position(arguments[1]).type->get_name());
248
1
                }
249
7
                std::tie(default_value_col, default_value_const) =
250
7
                        unpack_if_const(block.get_by_position(arguments[1]).column);
251
7
                if (default_value_const) {
252
1
                    const JsonbDocument* default_value_doc = nullptr;
253
1
                    if (default_value_col->is_null_at(0)) {
254
1
                        default_value_null_const = true;
255
1
                    } else {
256
0
                        auto data = default_value_col->get_data_at(0);
257
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
258
0
                                                                              &default_value_doc));
259
0
                        constant_default_value = data;
260
0
                    }
261
6
                } else {
262
6
                    if (const auto* nullable_col =
263
6
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
264
4
                        default_value_str_col = assert_cast<const ColumnString*>(
265
4
                                nullable_col->get_nested_column_ptr().get());
266
4
                        default_value_nullmap = &(nullable_col->get_null_map_data());
267
4
                    } else {
268
2
                        default_value_str_col =
269
2
                                assert_cast<const ColumnString*>(default_value_col.get());
270
2
                    }
271
6
                }
272
7
            } else if (arguments.size() == 1) {
273
                // parse default value '{}' should always success.
274
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
275
0
                default_value_const = true;
276
0
                constant_default_value.data = default_jsonb_value_parser.value();
277
0
                constant_default_value.size = default_jsonb_value_parser.size();
278
0
            }
279
15
        }
280
281
14
        auto col_to = ColumnString::create();
282
283
60
        col_to->reserve(input_rows_count);
284
285
60
        auto& null_map_data = null_map->get_data();
286
287
        // parser can be reused for performance
288
60
        JsonBinaryValue jsonb_value;
289
290
1.30k
        for (size_t i = 0; i < input_rows_count; ++i) {
291
1.22k
            if (is_nullable && null_map_data[i]) {
292
13
                col_to->insert_default();
293
13
                continue;
294
13
            }
295
296
1.21k
            auto index = index_check_const(i, col_from_is_const);
297
1.21k
            const auto& val = col_from_string->get_data_at(index);
298
1.21k
            auto st = jsonb_value.from_json_string(val.data, val.size);
299
1.21k
            if (st.ok()) {
300
                // insert jsonb format data
301
1.16k
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
302
1.16k
            } else {
303
54
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
304
6
                    return Status::InvalidArgument(
305
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
306
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
307
17
                    null_map_data[i] = 1;
308
17
                    col_to->insert_default();
309
31
                } else {
310
31
                    if (default_value_const) {
311
9
                        if (default_value_null_const) {
312
3
                            null_map_data[i] = 1;
313
3
                            col_to->insert_default();
314
6
                        } else {
315
6
                            col_to->insert_data(constant_default_value.data,
316
6
                                                constant_default_value.size);
317
6
                        }
318
22
                    } else {
319
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
320
3
                            null_map_data[i] = 1;
321
3
                            col_to->insert_default();
322
3
                            continue;
323
3
                        }
324
19
                        auto value = default_value_str_col->get_data_at(i);
325
19
                        col_to->insert_data(value.data, value.size);
326
19
                    }
327
31
                }
328
54
            }
329
1.21k
        }
330
331
78
        if (is_nullable) {
332
59
            block.replace_by_position(
333
59
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
334
59
        } else {
335
19
            block.replace_by_position(result, std::move(col_to));
336
19
        }
337
338
18
        return Status::OK();
339
15
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
26
                        uint32_t result, size_t input_rows_count) const override {
185
26
        auto&& [col_from, col_from_is_const] =
186
26
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
26
        if (col_from_is_const && col_from->is_null_at(0)) {
189
0
            auto col_str = ColumnString::create();
190
0
            col_str->insert_default();
191
0
            auto null_map = ColumnUInt8::create(1, 1);
192
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
0
            block.get_by_position(result).column =
194
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
0
            return Status::OK();
196
0
        }
197
198
26
        auto null_map = ColumnUInt8::create(0, 0);
199
26
        bool is_nullable = false;
200
201
26
        switch (nullable_mode) {
202
0
        case NullalbeMode::NULLABLE: {
203
0
            is_nullable = true;
204
0
            break;
205
0
        }
206
26
        case NullalbeMode::FOLLOW_INPUT: {
207
26
            for (auto arg : arguments) {
208
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
26
            }
210
26
            break;
211
0
        }
212
26
        }
213
214
26
        if (is_nullable) {
215
17
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
17
        }
217
218
26
        const ColumnString* col_from_string = nullptr;
219
26
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
220
17
            VectorizedUtils::update_null_map(null_map->get_data(),
221
17
                                             nullable_col->get_null_map_data(), col_from_is_const);
222
17
            col_from_string =
223
17
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
224
17
        } else {
225
9
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
226
9
        }
227
228
26
        StringRef constant_default_value;
229
26
        bool default_value_const = false;
230
26
        bool default_value_null_const = false;
231
26
        ColumnPtr default_value_col;
232
26
        JsonBinaryValue default_jsonb_value_parser;
233
26
        const ColumnString* default_value_str_col = nullptr;
234
26
        const NullMap* default_value_nullmap = nullptr;
235
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
236
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
237
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
238
            if (state && state->has_const_default_value) {
239
                constant_default_value = state->default_value;
240
                default_value_null_const = state->default_is_null;
241
                default_value_const = true;
242
            } else if (arguments.size() > 1) {
243
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
244
                    PrimitiveType::TYPE_JSONB) {
245
                    return Status::InvalidArgument(
246
                            "{} second argument should be jsonb type, but got {}", get_name(),
247
                            block.get_by_position(arguments[1]).type->get_name());
248
                }
249
                std::tie(default_value_col, default_value_const) =
250
                        unpack_if_const(block.get_by_position(arguments[1]).column);
251
                if (default_value_const) {
252
                    const JsonbDocument* default_value_doc = nullptr;
253
                    if (default_value_col->is_null_at(0)) {
254
                        default_value_null_const = true;
255
                    } else {
256
                        auto data = default_value_col->get_data_at(0);
257
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
258
                                                                              &default_value_doc));
259
                        constant_default_value = data;
260
                    }
261
                } else {
262
                    if (const auto* nullable_col =
263
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
264
                        default_value_str_col = assert_cast<const ColumnString*>(
265
                                nullable_col->get_nested_column_ptr().get());
266
                        default_value_nullmap = &(nullable_col->get_null_map_data());
267
                    } else {
268
                        default_value_str_col =
269
                                assert_cast<const ColumnString*>(default_value_col.get());
270
                    }
271
                }
272
            } else if (arguments.size() == 1) {
273
                // parse default value '{}' should always success.
274
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
275
                default_value_const = true;
276
                constant_default_value.data = default_jsonb_value_parser.value();
277
                constant_default_value.size = default_jsonb_value_parser.size();
278
            }
279
        }
280
281
26
        auto col_to = ColumnString::create();
282
283
26
        col_to->reserve(input_rows_count);
284
285
26
        auto& null_map_data = null_map->get_data();
286
287
        // parser can be reused for performance
288
26
        JsonBinaryValue jsonb_value;
289
290
68
        for (size_t i = 0; i < input_rows_count; ++i) {
291
42
            if (is_nullable && null_map_data[i]) {
292
1
                col_to->insert_default();
293
1
                continue;
294
1
            }
295
296
41
            auto index = index_check_const(i, col_from_is_const);
297
41
            const auto& val = col_from_string->get_data_at(index);
298
41
            auto st = jsonb_value.from_json_string(val.data, val.size);
299
41
            if (st.ok()) {
300
                // insert jsonb format data
301
35
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
302
35
            } else {
303
6
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
304
6
                    return Status::InvalidArgument(
305
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
306
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
307
                    null_map_data[i] = 1;
308
                    col_to->insert_default();
309
                } else {
310
                    if (default_value_const) {
311
                        if (default_value_null_const) {
312
                            null_map_data[i] = 1;
313
                            col_to->insert_default();
314
                        } else {
315
                            col_to->insert_data(constant_default_value.data,
316
                                                constant_default_value.size);
317
                        }
318
                    } else {
319
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
320
                            null_map_data[i] = 1;
321
                            col_to->insert_default();
322
                            continue;
323
                        }
324
                        auto value = default_value_str_col->get_data_at(i);
325
                        col_to->insert_data(value.data, value.size);
326
                    }
327
                }
328
6
            }
329
41
        }
330
331
26
        if (is_nullable) {
332
11
            block.replace_by_position(
333
11
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
334
15
        } else {
335
15
            block.replace_by_position(result, std::move(col_to));
336
15
        }
337
338
26
        return Status::OK();
339
26
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
34
                        uint32_t result, size_t input_rows_count) const override {
185
34
        auto&& [col_from, col_from_is_const] =
186
34
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
34
        if (col_from_is_const && col_from->is_null_at(0)) {
189
0
            auto col_str = ColumnString::create();
190
0
            col_str->insert_default();
191
0
            auto null_map = ColumnUInt8::create(1, 1);
192
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
0
            block.get_by_position(result).column =
194
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
0
            return Status::OK();
196
0
        }
197
198
34
        auto null_map = ColumnUInt8::create(0, 0);
199
34
        bool is_nullable = false;
200
201
34
        switch (nullable_mode) {
202
34
        case NullalbeMode::NULLABLE: {
203
34
            is_nullable = true;
204
34
            break;
205
0
        }
206
0
        case NullalbeMode::FOLLOW_INPUT: {
207
0
            for (auto arg : arguments) {
208
0
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
0
            }
210
0
            break;
211
0
        }
212
34
        }
213
214
34
        if (is_nullable) {
215
34
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
34
        }
217
218
34
        const ColumnString* col_from_string = nullptr;
219
34
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
220
11
            VectorizedUtils::update_null_map(null_map->get_data(),
221
11
                                             nullable_col->get_null_map_data(), col_from_is_const);
222
11
            col_from_string =
223
11
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
224
23
        } else {
225
23
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
226
23
        }
227
228
34
        StringRef constant_default_value;
229
34
        bool default_value_const = false;
230
34
        bool default_value_null_const = false;
231
34
        ColumnPtr default_value_col;
232
34
        JsonBinaryValue default_jsonb_value_parser;
233
34
        const ColumnString* default_value_str_col = nullptr;
234
34
        const NullMap* default_value_nullmap = nullptr;
235
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
236
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
237
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
238
            if (state && state->has_const_default_value) {
239
                constant_default_value = state->default_value;
240
                default_value_null_const = state->default_is_null;
241
                default_value_const = true;
242
            } else if (arguments.size() > 1) {
243
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
244
                    PrimitiveType::TYPE_JSONB) {
245
                    return Status::InvalidArgument(
246
                            "{} second argument should be jsonb type, but got {}", get_name(),
247
                            block.get_by_position(arguments[1]).type->get_name());
248
                }
249
                std::tie(default_value_col, default_value_const) =
250
                        unpack_if_const(block.get_by_position(arguments[1]).column);
251
                if (default_value_const) {
252
                    const JsonbDocument* default_value_doc = nullptr;
253
                    if (default_value_col->is_null_at(0)) {
254
                        default_value_null_const = true;
255
                    } else {
256
                        auto data = default_value_col->get_data_at(0);
257
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
258
                                                                              &default_value_doc));
259
                        constant_default_value = data;
260
                    }
261
                } else {
262
                    if (const auto* nullable_col =
263
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
264
                        default_value_str_col = assert_cast<const ColumnString*>(
265
                                nullable_col->get_nested_column_ptr().get());
266
                        default_value_nullmap = &(nullable_col->get_null_map_data());
267
                    } else {
268
                        default_value_str_col =
269
                                assert_cast<const ColumnString*>(default_value_col.get());
270
                    }
271
                }
272
            } else if (arguments.size() == 1) {
273
                // parse default value '{}' should always success.
274
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
275
                default_value_const = true;
276
                constant_default_value.data = default_jsonb_value_parser.value();
277
                constant_default_value.size = default_jsonb_value_parser.size();
278
            }
279
        }
280
281
34
        auto col_to = ColumnString::create();
282
283
34
        col_to->reserve(input_rows_count);
284
285
34
        auto& null_map_data = null_map->get_data();
286
287
        // parser can be reused for performance
288
34
        JsonBinaryValue jsonb_value;
289
290
99
        for (size_t i = 0; i < input_rows_count; ++i) {
291
65
            if (is_nullable && null_map_data[i]) {
292
6
                col_to->insert_default();
293
6
                continue;
294
6
            }
295
296
59
            auto index = index_check_const(i, col_from_is_const);
297
59
            const auto& val = col_from_string->get_data_at(index);
298
59
            auto st = jsonb_value.from_json_string(val.data, val.size);
299
59
            if (st.ok()) {
300
                // insert jsonb format data
301
42
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
302
42
            } else {
303
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
304
                    return Status::InvalidArgument(
305
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
306
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
307
17
                    null_map_data[i] = 1;
308
17
                    col_to->insert_default();
309
                } else {
310
                    if (default_value_const) {
311
                        if (default_value_null_const) {
312
                            null_map_data[i] = 1;
313
                            col_to->insert_default();
314
                        } else {
315
                            col_to->insert_data(constant_default_value.data,
316
                                                constant_default_value.size);
317
                        }
318
                    } else {
319
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
320
                            null_map_data[i] = 1;
321
                            col_to->insert_default();
322
                            continue;
323
                        }
324
                        auto value = default_value_str_col->get_data_at(i);
325
                        col_to->insert_data(value.data, value.size);
326
                    }
327
                }
328
17
            }
329
59
        }
330
331
34
        if (is_nullable) {
332
34
            block.replace_by_position(
333
34
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
334
34
        } else {
335
0
            block.replace_by_position(result, std::move(col_to));
336
0
        }
337
338
34
        return Status::OK();
339
34
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
184
16
                        uint32_t result, size_t input_rows_count) const override {
185
16
        auto&& [col_from, col_from_is_const] =
186
16
                unpack_if_const(block.get_by_position(arguments[0]).column);
187
188
16
        if (col_from_is_const && col_from->is_null_at(0)) {
189
1
            auto col_str = ColumnString::create();
190
1
            col_str->insert_default();
191
1
            auto null_map = ColumnUInt8::create(1, 1);
192
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
193
1
            block.get_by_position(result).column =
194
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
195
1
            return Status::OK();
196
1
        }
197
198
15
        auto null_map = ColumnUInt8::create(0, 0);
199
15
        bool is_nullable = false;
200
201
15
        switch (nullable_mode) {
202
0
        case NullalbeMode::NULLABLE: {
203
0
            is_nullable = true;
204
0
            break;
205
0
        }
206
15
        case NullalbeMode::FOLLOW_INPUT: {
207
28
            for (auto arg : arguments) {
208
28
                is_nullable |= block.get_by_position(arg).type->is_nullable();
209
28
            }
210
15
            break;
211
0
        }
212
15
        }
213
214
15
        if (is_nullable) {
215
14
            null_map = ColumnUInt8::create(input_rows_count, 0);
216
14
        }
217
218
15
        const ColumnString* col_from_string = nullptr;
219
15
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
220
12
            VectorizedUtils::update_null_map(null_map->get_data(),
221
12
                                             nullable_col->get_null_map_data(), col_from_is_const);
222
12
            col_from_string =
223
12
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
224
12
        } else {
225
3
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
226
3
        }
227
228
15
        StringRef constant_default_value;
229
15
        bool default_value_const = false;
230
15
        bool default_value_null_const = false;
231
15
        ColumnPtr default_value_col;
232
15
        JsonBinaryValue default_jsonb_value_parser;
233
15
        const ColumnString* default_value_str_col = nullptr;
234
15
        const NullMap* default_value_nullmap = nullptr;
235
15
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
236
15
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
237
15
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
238
15
            if (state && state->has_const_default_value) {
239
7
                constant_default_value = state->default_value;
240
7
                default_value_null_const = state->default_is_null;
241
7
                default_value_const = true;
242
8
            } else if (arguments.size() > 1) {
243
8
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
244
8
                    PrimitiveType::TYPE_JSONB) {
245
1
                    return Status::InvalidArgument(
246
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
247
1
                            block.get_by_position(arguments[1]).type->get_name());
248
1
                }
249
7
                std::tie(default_value_col, default_value_const) =
250
7
                        unpack_if_const(block.get_by_position(arguments[1]).column);
251
7
                if (default_value_const) {
252
1
                    const JsonbDocument* default_value_doc = nullptr;
253
1
                    if (default_value_col->is_null_at(0)) {
254
1
                        default_value_null_const = true;
255
1
                    } else {
256
0
                        auto data = default_value_col->get_data_at(0);
257
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
258
0
                                                                              &default_value_doc));
259
0
                        constant_default_value = data;
260
0
                    }
261
6
                } else {
262
6
                    if (const auto* nullable_col =
263
6
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
264
4
                        default_value_str_col = assert_cast<const ColumnString*>(
265
4
                                nullable_col->get_nested_column_ptr().get());
266
4
                        default_value_nullmap = &(nullable_col->get_null_map_data());
267
4
                    } else {
268
2
                        default_value_str_col =
269
2
                                assert_cast<const ColumnString*>(default_value_col.get());
270
2
                    }
271
6
                }
272
7
            } else if (arguments.size() == 1) {
273
                // parse default value '{}' should always success.
274
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
275
0
                default_value_const = true;
276
0
                constant_default_value.data = default_jsonb_value_parser.value();
277
0
                constant_default_value.size = default_jsonb_value_parser.size();
278
0
            }
279
15
        }
280
281
14
        auto col_to = ColumnString::create();
282
283
15
        col_to->reserve(input_rows_count);
284
285
15
        auto& null_map_data = null_map->get_data();
286
287
        // parser can be reused for performance
288
15
        JsonBinaryValue jsonb_value;
289
290
1.14k
        for (size_t i = 0; i < input_rows_count; ++i) {
291
1.12k
            if (is_nullable && null_map_data[i]) {
292
6
                col_to->insert_default();
293
6
                continue;
294
6
            }
295
296
1.11k
            auto index = index_check_const(i, col_from_is_const);
297
1.11k
            const auto& val = col_from_string->get_data_at(index);
298
1.11k
            auto st = jsonb_value.from_json_string(val.data, val.size);
299
1.11k
            if (st.ok()) {
300
                // insert jsonb format data
301
1.08k
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
302
1.08k
            } else {
303
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
304
                    return Status::InvalidArgument(
305
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
306
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
307
                    null_map_data[i] = 1;
308
                    col_to->insert_default();
309
31
                } else {
310
31
                    if (default_value_const) {
311
9
                        if (default_value_null_const) {
312
3
                            null_map_data[i] = 1;
313
3
                            col_to->insert_default();
314
6
                        } else {
315
6
                            col_to->insert_data(constant_default_value.data,
316
6
                                                constant_default_value.size);
317
6
                        }
318
22
                    } else {
319
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
320
3
                            null_map_data[i] = 1;
321
3
                            col_to->insert_default();
322
3
                            continue;
323
3
                        }
324
19
                        auto value = default_value_str_col->get_data_at(i);
325
19
                        col_to->insert_data(value.data, value.size);
326
19
                    }
327
31
                }
328
31
            }
329
1.11k
        }
330
331
18
        if (is_nullable) {
332
14
            block.replace_by_position(
333
14
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
334
14
        } else {
335
4
            block.replace_by_position(result, std::move(col_to));
336
4
        }
337
338
18
        return Status::OK();
339
15
    }
340
};
341
342
// jsonb_parse return type nullable as input
343
using FunctionJsonbParse =
344
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>;
345
using FunctionJsonbParseErrorNull =
346
        FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>;
347
using FunctionJsonbParseErrorValue =
348
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>;
349
350
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
351
template <typename Impl>
352
class FunctionJsonbExtract : public IFunction {
353
public:
354
    static constexpr auto name = Impl::name;
355
    static constexpr auto alias = Impl::alias;
356
1.73k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv
Line
Count
Source
356
149
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv
Line
Count
Source
356
148
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv
Line
Count
Source
356
1.41k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv
Line
Count
Source
356
18
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
357
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev
358
1.70k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv
Line
Count
Source
358
141
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv
Line
Count
Source
358
140
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv
Line
Count
Source
358
1.41k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv
Line
Count
Source
358
10
    bool is_variadic() const override { return true; }
359
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv
Line
Count
Source
359
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv
360
14.3k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv
Line
Count
Source
360
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv
Line
Count
Source
360
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv
Line
Count
Source
360
11.4k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv
Line
Count
Source
360
18
    bool use_default_implementation_for_nulls() const override { return false; }
361
1.69k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
1.69k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
363
1.69k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
140
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
140
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
363
140
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
139
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
139
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
363
139
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
1.40k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
1.40k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
363
1.40k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
361
9
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
9
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
363
9
    }
364
32
    DataTypes get_variadic_argument_types_impl() const override {
365
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
366
            return Impl::get_variadic_argument_types_impl();
367
32
        } else {
368
32
            return {};
369
32
        }
370
32
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
364
8
    DataTypes get_variadic_argument_types_impl() const override {
365
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
366
            return Impl::get_variadic_argument_types_impl();
367
8
        } else {
368
8
            return {};
369
8
        }
370
8
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv
Line
Count
Source
364
8
    DataTypes get_variadic_argument_types_impl() const override {
365
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
366
            return Impl::get_variadic_argument_types_impl();
367
8
        } else {
368
8
            return {};
369
8
        }
370
8
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv
Line
Count
Source
364
8
    DataTypes get_variadic_argument_types_impl() const override {
365
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
366
            return Impl::get_variadic_argument_types_impl();
367
8
        } else {
368
8
            return {};
369
8
        }
370
8
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv
Line
Count
Source
364
8
    DataTypes get_variadic_argument_types_impl() const override {
365
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
366
            return Impl::get_variadic_argument_types_impl();
367
8
        } else {
368
8
            return {};
369
8
        }
370
8
    }
371
372
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
373
12.6k
                        uint32_t result, size_t input_rows_count) const override {
374
12.6k
        DORIS_CHECK_GE(arguments.size(), 2);
375
376
12.6k
        ColumnPtr jsonb_data_column;
377
12.6k
        bool jsonb_data_const = false;
378
12.6k
        const NullMap* data_null_map = nullptr;
379
380
12.6k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
381
12.6k
            PrimitiveType::TYPE_JSONB) {
382
1
            return Status::InvalidArgument(
383
1
                    "jsonb_extract first argument should be json type, but got {}",
384
1
                    block.get_by_position(arguments[0]).type->get_name());
385
1
        }
386
387
        // prepare jsonb data column
388
12.6k
        std::tie(jsonb_data_column, jsonb_data_const) =
389
12.6k
                unpack_if_const(block.get_by_position(arguments[0]).column);
390
12.6k
        if (const auto* nullable_column =
391
12.6k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
392
10.8k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
393
10.8k
            data_null_map = &nullable_column->get_null_map_data();
394
10.8k
        }
395
12.6k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
396
12.6k
        const auto& loffsets =
397
12.6k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
398
399
        // prepare parse path column prepare
400
12.6k
        std::vector<const ColumnString*> jsonb_path_columns;
401
12.6k
        std::vector<bool> path_const(arguments.size() - 1);
402
12.6k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
403
25.6k
        for (int i = 0; i < arguments.size() - 1; ++i) {
404
13.0k
            ColumnPtr path_column;
405
13.0k
            bool is_const = false;
406
13.0k
            std::tie(path_column, is_const) =
407
13.0k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
408
13.0k
            path_const[i] = is_const;
409
13.0k
            if (const auto* nullable_column =
410
13.0k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
411
70
                path_column = nullable_column->get_nested_column_ptr();
412
70
                path_null_maps[i] = &nullable_column->get_null_map_data();
413
70
            }
414
13.0k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
415
13.0k
        }
416
417
12.6k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
418
12.6k
        auto res = Impl::ColumnType::create();
419
420
        // execute Impl
421
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
422
11.3k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
423
11.3k
            auto& res_data = res->get_chars();
424
11.3k
            auto& res_offsets = res->get_offsets();
425
11.3k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
426
11.3k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
427
11.3k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
428
11.3k
        } else {
429
            // not support other extract type for now (e.g. int, double, ...)
430
1.32k
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
431
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
432
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
433
434
1.32k
            auto create_all_null_result = [&]() {
435
2
                res = Impl::ColumnType::create();
436
2
                res->insert_default();
437
2
                auto nullable_column =
438
2
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
439
2
                auto const_column =
440
2
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
441
2
                block.get_by_position(result).column = std::move(const_column);
442
2
                return Status::OK();
443
2
            };
444
445
1.32k
            if (jsonb_data_const) {
446
2
                if (data_null_map && (*data_null_map)[0]) {
447
1
                    return create_all_null_result();
448
1
                }
449
450
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
451
1
                                                    rdata, roffsets, path_null_maps[0],
452
1
                                                    res->get_data(), null_map->get_data()));
453
1.32k
            } else if (path_const[0]) {
454
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
455
1
                    return create_all_null_result();
456
1
                }
457
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
458
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
459
1.32k
                                                    res->get_data(), null_map->get_data()));
460
1.32k
            } else {
461
4
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
462
4
                                                    roffsets, path_null_maps[0], res->get_data(),
463
4
                                                    null_map->get_data()));
464
4
            }
465
1.32k
        }
466
467
12.6k
        block.get_by_position(result).column =
468
12.6k
                ColumnNullable::create(std::move(res), std::move(null_map));
469
12.6k
        return Status::OK();
470
12.6k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
373
1.32k
                        uint32_t result, size_t input_rows_count) const override {
374
1.32k
        DORIS_CHECK_GE(arguments.size(), 2);
375
376
1.32k
        ColumnPtr jsonb_data_column;
377
1.32k
        bool jsonb_data_const = false;
378
1.32k
        const NullMap* data_null_map = nullptr;
379
380
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
381
1.32k
            PrimitiveType::TYPE_JSONB) {
382
0
            return Status::InvalidArgument(
383
0
                    "jsonb_extract first argument should be json type, but got {}",
384
0
                    block.get_by_position(arguments[0]).type->get_name());
385
0
        }
386
387
        // prepare jsonb data column
388
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
389
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
390
1.32k
        if (const auto* nullable_column =
391
1.32k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
392
1.14k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
393
1.14k
            data_null_map = &nullable_column->get_null_map_data();
394
1.14k
        }
395
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
396
1.32k
        const auto& loffsets =
397
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
398
399
        // prepare parse path column prepare
400
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
401
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
402
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
403
2.65k
        for (int i = 0; i < arguments.size() - 1; ++i) {
404
1.32k
            ColumnPtr path_column;
405
1.32k
            bool is_const = false;
406
1.32k
            std::tie(path_column, is_const) =
407
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
408
1.32k
            path_const[i] = is_const;
409
1.32k
            if (const auto* nullable_column =
410
1.32k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
411
5
                path_column = nullable_column->get_nested_column_ptr();
412
5
                path_null_maps[i] = &nullable_column->get_null_map_data();
413
5
            }
414
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
415
1.32k
        }
416
417
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
418
1.32k
        auto res = Impl::ColumnType::create();
419
420
        // execute Impl
421
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
422
1.32k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
423
1.32k
            auto& res_data = res->get_chars();
424
1.32k
            auto& res_offsets = res->get_offsets();
425
1.32k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
426
1.32k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
427
1.32k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
428
        } else {
429
            // not support other extract type for now (e.g. int, double, ...)
430
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
431
            const auto& rdata = jsonb_path_columns[0]->get_chars();
432
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
433
434
            auto create_all_null_result = [&]() {
435
                res = Impl::ColumnType::create();
436
                res->insert_default();
437
                auto nullable_column =
438
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
439
                auto const_column =
440
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
441
                block.get_by_position(result).column = std::move(const_column);
442
                return Status::OK();
443
            };
444
445
            if (jsonb_data_const) {
446
                if (data_null_map && (*data_null_map)[0]) {
447
                    return create_all_null_result();
448
                }
449
450
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
451
                                                    rdata, roffsets, path_null_maps[0],
452
                                                    res->get_data(), null_map->get_data()));
453
            } else if (path_const[0]) {
454
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
455
                    return create_all_null_result();
456
                }
457
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
458
                                                    jsonb_path_columns[0]->get_data_at(0),
459
                                                    res->get_data(), null_map->get_data()));
460
            } else {
461
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
462
                                                    roffsets, path_null_maps[0], res->get_data(),
463
                                                    null_map->get_data()));
464
            }
465
        }
466
467
1.32k
        block.get_by_position(result).column =
468
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
469
1.32k
        return Status::OK();
470
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
373
1.32k
                        uint32_t result, size_t input_rows_count) const override {
374
1.32k
        DORIS_CHECK_GE(arguments.size(), 2);
375
376
1.32k
        ColumnPtr jsonb_data_column;
377
1.32k
        bool jsonb_data_const = false;
378
1.32k
        const NullMap* data_null_map = nullptr;
379
380
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
381
1.32k
            PrimitiveType::TYPE_JSONB) {
382
0
            return Status::InvalidArgument(
383
0
                    "jsonb_extract first argument should be json type, but got {}",
384
0
                    block.get_by_position(arguments[0]).type->get_name());
385
0
        }
386
387
        // prepare jsonb data column
388
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
389
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
390
1.32k
        if (const auto* nullable_column =
391
1.32k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
392
1.14k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
393
1.14k
            data_null_map = &nullable_column->get_null_map_data();
394
1.14k
        }
395
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
396
1.32k
        const auto& loffsets =
397
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
398
399
        // prepare parse path column prepare
400
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
401
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
402
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
403
2.65k
        for (int i = 0; i < arguments.size() - 1; ++i) {
404
1.32k
            ColumnPtr path_column;
405
1.32k
            bool is_const = false;
406
1.32k
            std::tie(path_column, is_const) =
407
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
408
1.32k
            path_const[i] = is_const;
409
1.32k
            if (const auto* nullable_column =
410
1.32k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
411
4
                path_column = nullable_column->get_nested_column_ptr();
412
4
                path_null_maps[i] = &nullable_column->get_null_map_data();
413
4
            }
414
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
415
1.32k
        }
416
417
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
418
1.32k
        auto res = Impl::ColumnType::create();
419
420
        // execute Impl
421
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
422
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
423
            auto& res_data = res->get_chars();
424
            auto& res_offsets = res->get_offsets();
425
            RETURN_IF_ERROR(Impl::vector_vector_v2(
426
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
427
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
428
1.32k
        } else {
429
            // not support other extract type for now (e.g. int, double, ...)
430
1.32k
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
431
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
432
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
433
434
1.32k
            auto create_all_null_result = [&]() {
435
1.32k
                res = Impl::ColumnType::create();
436
1.32k
                res->insert_default();
437
1.32k
                auto nullable_column =
438
1.32k
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
439
1.32k
                auto const_column =
440
1.32k
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
441
1.32k
                block.get_by_position(result).column = std::move(const_column);
442
1.32k
                return Status::OK();
443
1.32k
            };
444
445
1.32k
            if (jsonb_data_const) {
446
2
                if (data_null_map && (*data_null_map)[0]) {
447
1
                    return create_all_null_result();
448
1
                }
449
450
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
451
1
                                                    rdata, roffsets, path_null_maps[0],
452
1
                                                    res->get_data(), null_map->get_data()));
453
1.32k
            } else if (path_const[0]) {
454
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
455
1
                    return create_all_null_result();
456
1
                }
457
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
458
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
459
1.32k
                                                    res->get_data(), null_map->get_data()));
460
1.32k
            } else {
461
4
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
462
4
                                                    roffsets, path_null_maps[0], res->get_data(),
463
4
                                                    null_map->get_data()));
464
4
            }
465
1.32k
        }
466
467
1.32k
        block.get_by_position(result).column =
468
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
469
1.32k
        return Status::OK();
470
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
373
10.0k
                        uint32_t result, size_t input_rows_count) const override {
374
10.0k
        DORIS_CHECK_GE(arguments.size(), 2);
375
376
10.0k
        ColumnPtr jsonb_data_column;
377
10.0k
        bool jsonb_data_const = false;
378
10.0k
        const NullMap* data_null_map = nullptr;
379
380
10.0k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
381
10.0k
            PrimitiveType::TYPE_JSONB) {
382
1
            return Status::InvalidArgument(
383
1
                    "jsonb_extract first argument should be json type, but got {}",
384
1
                    block.get_by_position(arguments[0]).type->get_name());
385
1
        }
386
387
        // prepare jsonb data column
388
10.0k
        std::tie(jsonb_data_column, jsonb_data_const) =
389
10.0k
                unpack_if_const(block.get_by_position(arguments[0]).column);
390
10.0k
        if (const auto* nullable_column =
391
10.0k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
392
8.53k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
393
8.53k
            data_null_map = &nullable_column->get_null_map_data();
394
8.53k
        }
395
10.0k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
396
10.0k
        const auto& loffsets =
397
10.0k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
398
399
        // prepare parse path column prepare
400
10.0k
        std::vector<const ColumnString*> jsonb_path_columns;
401
10.0k
        std::vector<bool> path_const(arguments.size() - 1);
402
10.0k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
403
20.3k
        for (int i = 0; i < arguments.size() - 1; ++i) {
404
10.3k
            ColumnPtr path_column;
405
10.3k
            bool is_const = false;
406
10.3k
            std::tie(path_column, is_const) =
407
10.3k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
408
10.3k
            path_const[i] = is_const;
409
10.3k
            if (const auto* nullable_column =
410
10.3k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
411
60
                path_column = nullable_column->get_nested_column_ptr();
412
60
                path_null_maps[i] = &nullable_column->get_null_map_data();
413
60
            }
414
10.3k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
415
10.3k
        }
416
417
10.0k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
418
10.0k
        auto res = Impl::ColumnType::create();
419
420
        // execute Impl
421
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
422
10.0k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
423
10.0k
            auto& res_data = res->get_chars();
424
10.0k
            auto& res_offsets = res->get_offsets();
425
10.0k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
426
10.0k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
427
10.0k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
428
        } else {
429
            // not support other extract type for now (e.g. int, double, ...)
430
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
431
            const auto& rdata = jsonb_path_columns[0]->get_chars();
432
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
433
434
            auto create_all_null_result = [&]() {
435
                res = Impl::ColumnType::create();
436
                res->insert_default();
437
                auto nullable_column =
438
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
439
                auto const_column =
440
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
441
                block.get_by_position(result).column = std::move(const_column);
442
                return Status::OK();
443
            };
444
445
            if (jsonb_data_const) {
446
                if (data_null_map && (*data_null_map)[0]) {
447
                    return create_all_null_result();
448
                }
449
450
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
451
                                                    rdata, roffsets, path_null_maps[0],
452
                                                    res->get_data(), null_map->get_data()));
453
            } else if (path_const[0]) {
454
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
455
                    return create_all_null_result();
456
                }
457
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
458
                                                    jsonb_path_columns[0]->get_data_at(0),
459
                                                    res->get_data(), null_map->get_data()));
460
            } else {
461
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
462
                                                    roffsets, path_null_maps[0], res->get_data(),
463
                                                    null_map->get_data()));
464
            }
465
        }
466
467
9.99k
        block.get_by_position(result).column =
468
10.0k
                ColumnNullable::create(std::move(res), std::move(null_map));
469
10.0k
        return Status::OK();
470
10.0k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
373
9
                        uint32_t result, size_t input_rows_count) const override {
374
9
        DORIS_CHECK_GE(arguments.size(), 2);
375
376
9
        ColumnPtr jsonb_data_column;
377
9
        bool jsonb_data_const = false;
378
9
        const NullMap* data_null_map = nullptr;
379
380
9
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
381
9
            PrimitiveType::TYPE_JSONB) {
382
0
            return Status::InvalidArgument(
383
0
                    "jsonb_extract first argument should be json type, but got {}",
384
0
                    block.get_by_position(arguments[0]).type->get_name());
385
0
        }
386
387
        // prepare jsonb data column
388
9
        std::tie(jsonb_data_column, jsonb_data_const) =
389
9
                unpack_if_const(block.get_by_position(arguments[0]).column);
390
9
        if (const auto* nullable_column =
391
9
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
392
9
            jsonb_data_column = nullable_column->get_nested_column_ptr();
393
9
            data_null_map = &nullable_column->get_null_map_data();
394
9
        }
395
9
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
396
9
        const auto& loffsets =
397
9
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
398
399
        // prepare parse path column prepare
400
9
        std::vector<const ColumnString*> jsonb_path_columns;
401
9
        std::vector<bool> path_const(arguments.size() - 1);
402
9
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
403
22
        for (int i = 0; i < arguments.size() - 1; ++i) {
404
13
            ColumnPtr path_column;
405
13
            bool is_const = false;
406
13
            std::tie(path_column, is_const) =
407
13
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
408
13
            path_const[i] = is_const;
409
13
            if (const auto* nullable_column =
410
13
                        check_and_get_column<ColumnNullable>(path_column.get())) {
411
1
                path_column = nullable_column->get_nested_column_ptr();
412
1
                path_null_maps[i] = &nullable_column->get_null_map_data();
413
1
            }
414
13
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
415
13
        }
416
417
9
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
418
9
        auto res = Impl::ColumnType::create();
419
420
        // execute Impl
421
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
422
9
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
423
9
            auto& res_data = res->get_chars();
424
9
            auto& res_offsets = res->get_offsets();
425
9
            RETURN_IF_ERROR(Impl::vector_vector_v2(
426
9
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
427
9
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
428
        } else {
429
            // not support other extract type for now (e.g. int, double, ...)
430
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
431
            const auto& rdata = jsonb_path_columns[0]->get_chars();
432
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
433
434
            auto create_all_null_result = [&]() {
435
                res = Impl::ColumnType::create();
436
                res->insert_default();
437
                auto nullable_column =
438
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
439
                auto const_column =
440
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
441
                block.get_by_position(result).column = std::move(const_column);
442
                return Status::OK();
443
            };
444
445
            if (jsonb_data_const) {
446
                if (data_null_map && (*data_null_map)[0]) {
447
                    return create_all_null_result();
448
                }
449
450
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
451
                                                    rdata, roffsets, path_null_maps[0],
452
                                                    res->get_data(), null_map->get_data()));
453
            } else if (path_const[0]) {
454
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
455
                    return create_all_null_result();
456
                }
457
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
458
                                                    jsonb_path_columns[0]->get_data_at(0),
459
                                                    res->get_data(), null_map->get_data()));
460
            } else {
461
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
462
                                                    roffsets, path_null_maps[0], res->get_data(),
463
                                                    null_map->get_data()));
464
            }
465
        }
466
467
9
        block.get_by_position(result).column =
468
9
                ColumnNullable::create(std::move(res), std::move(null_map));
469
9
        return Status::OK();
470
9
    }
471
};
472
473
class FunctionJsonbKeys : public IFunction {
474
public:
475
    static constexpr auto name = "json_keys";
476
    static constexpr auto alias = "jsonb_keys";
477
52
    static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); }
478
0
    String get_name() const override { return name; }
479
44
    bool is_variadic() const override { return true; }
480
0
    size_t get_number_of_arguments() const override { return 0; }
481
482
148
    bool use_default_implementation_for_nulls() const override { return false; }
483
484
43
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
485
43
        return make_nullable(
486
43
                std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
487
43
    }
488
489
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
490
105
                        uint32_t result, size_t input_rows_count) const override {
491
105
        DORIS_CHECK_GE(arguments.size(), 1);
492
105
        DORIS_CHECK(arguments.size() == 1 || arguments.size() == 2)
493
0
                << "json_keys should have 1 or 2 arguments, but got " << arguments.size();
494
495
105
        const NullMap* data_null_map = nullptr;
496
105
        const ColumnString* col_from_string = nullptr;
497
        // prepare jsonb data column
498
105
        auto&& [jsonb_data_column, json_data_const] =
499
105
                unpack_if_const(block.get_by_position(arguments[0]).column);
500
105
        if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
501
99
            col_from_string =
502
99
                    assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
503
99
            data_null_map = &nullable->get_null_map_data();
504
99
        } else {
505
6
            col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get());
506
6
        }
507
508
        // prepare parse path column prepare, maybe we do not have path column
509
105
        ColumnPtr jsonb_path_column = nullptr;
510
105
        const ColumnString* jsonb_path_col = nullptr;
511
105
        bool path_const = false;
512
105
        const NullMap* path_null_map = nullptr;
513
105
        if (arguments.size() == 2) {
514
            // we have should have a ColumnString for path
515
78
            std::tie(jsonb_path_column, path_const) =
516
78
                    unpack_if_const(block.get_by_position(arguments[1]).column);
517
78
            if (const auto* nullable =
518
78
                        check_and_get_column<ColumnNullable>(jsonb_path_column.get())) {
519
10
                jsonb_path_column = nullable->get_nested_column_ptr();
520
10
                path_null_map = &nullable->get_null_map_data();
521
10
            }
522
78
            jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get());
523
78
        }
524
525
105
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
526
105
        NullMap& res_null_map = null_map->get_data();
527
528
105
        auto dst_arr = ColumnArray::create(
529
105
                ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
530
105
                ColumnArray::ColumnOffsets::create());
531
105
        auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data());
532
533
105
        Status st = std::visit(
534
105
                [&](auto data_const, auto has_path, auto path_const) {
535
105
                    return inner_loop_impl<data_const, has_path, path_const>(
536
105
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
537
105
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
538
105
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
534
27
                [&](auto data_const, auto has_path, auto path_const) {
535
27
                    return inner_loop_impl<data_const, has_path, path_const>(
536
27
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
537
27
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
538
27
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
534
28
                [&](auto data_const, auto has_path, auto path_const) {
535
28
                    return inner_loop_impl<data_const, has_path, path_const>(
536
28
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
537
28
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
538
28
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
534
48
                [&](auto data_const, auto has_path, auto path_const) {
535
48
                    return inner_loop_impl<data_const, has_path, path_const>(
536
48
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
537
48
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
538
48
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
534
2
                [&](auto data_const, auto has_path, auto path_const) {
535
2
                    return inner_loop_impl<data_const, has_path, path_const>(
536
2
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
537
2
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
538
2
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
539
105
                make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column),
540
105
                make_bool_variant(path_const));
541
105
        if (!st.ok()) {
542
12
            return st;
543
12
        }
544
93
        block.get_by_position(result).column =
545
93
                ColumnNullable::create(std::move(dst_arr), std::move(null_map));
546
93
        return st;
547
105
    }
548
549
private:
550
    template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
551
    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr,
552
                                                ColumnNullable& dst_nested_column,
553
                                                NullMap& res_null_map,
554
                                                const ColumnString& col_from_string,
555
                                                const NullMap* jsonb_data_nullmap,
556
                                                const ColumnString* jsonb_path_column,
557
105
                                                const NullMap* path_null_map) {
558
        // if path is const, we just need to parse it once
559
105
        JsonbPath const_path;
560
105
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
561
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
562
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
563
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
564
1
                                               r_raw_ref.to_string());
565
1
            }
566
567
47
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
568
2
                return Status::InvalidJsonPath(
569
2
                        "In this situation, path expressions may not contain the * and ** tokens "
570
2
                        "or an array range.");
571
2
            }
572
47
        }
573
574
385
        for (size_t i = 0; i < input_rows_count; ++i) {
575
272
            auto index = index_check_const(i, JSONB_DATA_CONST);
576
            // if jsonb data is null or path column is null , we should return null
577
272
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
578
23
                res_null_map[i] = 1;
579
23
                dst_arr.insert_default();
580
23
                continue;
581
23
            }
582
249
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
583
73
                if (path_null_map && (*path_null_map)[i]) {
584
8
                    res_null_map[i] = 1;
585
8
                    dst_arr.insert_default();
586
8
                    continue;
587
8
                }
588
73
            }
589
590
65
            auto json_data = col_from_string.get_data_at(index);
591
249
            const JsonbDocument* doc = nullptr;
592
249
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
593
249
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
594
0
                dst_arr.clear();
595
0
                return Status::InvalidArgument("jsonb data is invalid");
596
0
            }
597
249
            const JsonbValue* obj_val;
598
249
            JsonbFindResult find_result;
599
249
            if constexpr (JSONB_PATH_PARAM) {
600
195
                if constexpr (!JSON_PATH_CONST) {
601
73
                    auto data = jsonb_path_column->get_data_at(i);
602
73
                    JsonbPath path;
603
73
                    if (!path.seek(data.data, data.size)) {
604
5
                        return Status::InvalidArgument(
605
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
606
5
                                std::string_view(data.data, data.size), i);
607
5
                    }
608
609
68
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
610
4
                        return Status::InvalidJsonPath(
611
4
                                "In this situation, path expressions may not contain the * and ** "
612
4
                                "tokens "
613
4
                                "or an array range. at row: {}",
614
4
                                i);
615
4
                    }
616
64
                    find_result = doc->getValue()->findValue(path);
617
122
                } else {
618
122
                    find_result = doc->getValue()->findValue(const_path);
619
122
                }
620
0
                obj_val = find_result.value;
621
195
            } else {
622
54
                obj_val = doc->getValue();
623
54
            }
624
625
249
            if (!obj_val || !obj_val->isObject()) {
626
                // if jsonb data is not object we should return null
627
182
                res_null_map[i] = 1;
628
182
                dst_arr.insert_default();
629
182
                continue;
630
182
            }
631
67
            const auto* obj = obj_val->unpack<ObjectVal>();
632
76
            for (const auto& it : *obj) {
633
76
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
634
76
            }
635
67
            dst_arr.get_offsets().push_back(dst_nested_column.size());
636
67
        } //for
637
113
        return Status::OK();
638
105
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
557
27
                                                const NullMap* path_null_map) {
558
        // if path is const, we just need to parse it once
559
27
        JsonbPath const_path;
560
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
561
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
562
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
563
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
564
                                               r_raw_ref.to_string());
565
            }
566
567
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
568
                return Status::InvalidJsonPath(
569
                        "In this situation, path expressions may not contain the * and ** tokens "
570
                        "or an array range.");
571
            }
572
        }
573
574
85
        for (size_t i = 0; i < input_rows_count; ++i) {
575
58
            auto index = index_check_const(i, JSONB_DATA_CONST);
576
            // if jsonb data is null or path column is null , we should return null
577
58
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
578
4
                res_null_map[i] = 1;
579
4
                dst_arr.insert_default();
580
4
                continue;
581
4
            }
582
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
583
                if (path_null_map && (*path_null_map)[i]) {
584
                    res_null_map[i] = 1;
585
                    dst_arr.insert_default();
586
                    continue;
587
                }
588
            }
589
590
54
            auto json_data = col_from_string.get_data_at(index);
591
54
            const JsonbDocument* doc = nullptr;
592
54
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
593
54
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
594
0
                dst_arr.clear();
595
0
                return Status::InvalidArgument("jsonb data is invalid");
596
0
            }
597
54
            const JsonbValue* obj_val;
598
54
            JsonbFindResult find_result;
599
            if constexpr (JSONB_PATH_PARAM) {
600
                if constexpr (!JSON_PATH_CONST) {
601
                    auto data = jsonb_path_column->get_data_at(i);
602
                    JsonbPath path;
603
                    if (!path.seek(data.data, data.size)) {
604
                        return Status::InvalidArgument(
605
                                "Json path error: Invalid Json Path for value: {} at row: {}",
606
                                std::string_view(data.data, data.size), i);
607
                    }
608
609
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
610
                        return Status::InvalidJsonPath(
611
                                "In this situation, path expressions may not contain the * and ** "
612
                                "tokens "
613
                                "or an array range. at row: {}",
614
                                i);
615
                    }
616
                    find_result = doc->getValue()->findValue(path);
617
                } else {
618
                    find_result = doc->getValue()->findValue(const_path);
619
                }
620
                obj_val = find_result.value;
621
54
            } else {
622
54
                obj_val = doc->getValue();
623
54
            }
624
625
54
            if (!obj_val || !obj_val->isObject()) {
626
                // if jsonb data is not object we should return null
627
36
                res_null_map[i] = 1;
628
36
                dst_arr.insert_default();
629
36
                continue;
630
36
            }
631
18
            const auto* obj = obj_val->unpack<ObjectVal>();
632
36
            for (const auto& it : *obj) {
633
36
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
634
36
            }
635
18
            dst_arr.get_offsets().push_back(dst_nested_column.size());
636
18
        } //for
637
27
        return Status::OK();
638
27
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
557
28
                                                const NullMap* path_null_map) {
558
        // if path is const, we just need to parse it once
559
28
        JsonbPath const_path;
560
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
561
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
562
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
563
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
564
                                               r_raw_ref.to_string());
565
            }
566
567
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
568
                return Status::InvalidJsonPath(
569
                        "In this situation, path expressions may not contain the * and ** tokens "
570
                        "or an array range.");
571
            }
572
        }
573
574
87
        for (size_t i = 0; i < input_rows_count; ++i) {
575
55
            auto index = index_check_const(i, JSONB_DATA_CONST);
576
            // if jsonb data is null or path column is null , we should return null
577
55
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
578
6
                res_null_map[i] = 1;
579
6
                dst_arr.insert_default();
580
6
                continue;
581
6
            }
582
49
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
583
49
                if (path_null_map && (*path_null_map)[i]) {
584
4
                    res_null_map[i] = 1;
585
4
                    dst_arr.insert_default();
586
4
                    continue;
587
4
                }
588
49
            }
589
590
45
            auto json_data = col_from_string.get_data_at(index);
591
49
            const JsonbDocument* doc = nullptr;
592
49
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
593
49
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
594
0
                dst_arr.clear();
595
0
                return Status::InvalidArgument("jsonb data is invalid");
596
0
            }
597
49
            const JsonbValue* obj_val;
598
49
            JsonbFindResult find_result;
599
49
            if constexpr (JSONB_PATH_PARAM) {
600
49
                if constexpr (!JSON_PATH_CONST) {
601
49
                    auto data = jsonb_path_column->get_data_at(i);
602
49
                    JsonbPath path;
603
49
                    if (!path.seek(data.data, data.size)) {
604
5
                        return Status::InvalidArgument(
605
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
606
5
                                std::string_view(data.data, data.size), i);
607
5
                    }
608
609
44
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
610
4
                        return Status::InvalidJsonPath(
611
4
                                "In this situation, path expressions may not contain the * and ** "
612
4
                                "tokens "
613
4
                                "or an array range. at row: {}",
614
4
                                i);
615
4
                    }
616
40
                    find_result = doc->getValue()->findValue(path);
617
                } else {
618
                    find_result = doc->getValue()->findValue(const_path);
619
                }
620
0
                obj_val = find_result.value;
621
            } else {
622
                obj_val = doc->getValue();
623
            }
624
625
49
            if (!obj_val || !obj_val->isObject()) {
626
                // if jsonb data is not object we should return null
627
25
                res_null_map[i] = 1;
628
25
                dst_arr.insert_default();
629
25
                continue;
630
25
            }
631
24
            const auto* obj = obj_val->unpack<ObjectVal>();
632
24
            for (const auto& it : *obj) {
633
15
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
634
15
            }
635
24
            dst_arr.get_offsets().push_back(dst_nested_column.size());
636
24
        } //for
637
32
        return Status::OK();
638
28
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
557
48
                                                const NullMap* path_null_map) {
558
        // if path is const, we just need to parse it once
559
48
        JsonbPath const_path;
560
48
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
561
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
562
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
563
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
564
1
                                               r_raw_ref.to_string());
565
1
            }
566
567
47
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
568
2
                return Status::InvalidJsonPath(
569
2
                        "In this situation, path expressions may not contain the * and ** tokens "
570
2
                        "or an array range.");
571
2
            }
572
47
        }
573
574
183
        for (size_t i = 0; i < input_rows_count; ++i) {
575
135
            auto index = index_check_const(i, JSONB_DATA_CONST);
576
            // if jsonb data is null or path column is null , we should return null
577
135
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
578
13
                res_null_map[i] = 1;
579
13
                dst_arr.insert_default();
580
13
                continue;
581
13
            }
582
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
583
                if (path_null_map && (*path_null_map)[i]) {
584
                    res_null_map[i] = 1;
585
                    dst_arr.insert_default();
586
                    continue;
587
                }
588
            }
589
590
122
            auto json_data = col_from_string.get_data_at(index);
591
122
            const JsonbDocument* doc = nullptr;
592
122
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
593
122
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
594
0
                dst_arr.clear();
595
0
                return Status::InvalidArgument("jsonb data is invalid");
596
0
            }
597
122
            const JsonbValue* obj_val;
598
122
            JsonbFindResult find_result;
599
122
            if constexpr (JSONB_PATH_PARAM) {
600
                if constexpr (!JSON_PATH_CONST) {
601
                    auto data = jsonb_path_column->get_data_at(i);
602
                    JsonbPath path;
603
                    if (!path.seek(data.data, data.size)) {
604
                        return Status::InvalidArgument(
605
                                "Json path error: Invalid Json Path for value: {} at row: {}",
606
                                std::string_view(data.data, data.size), i);
607
                    }
608
609
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
610
                        return Status::InvalidJsonPath(
611
                                "In this situation, path expressions may not contain the * and ** "
612
                                "tokens "
613
                                "or an array range. at row: {}",
614
                                i);
615
                    }
616
                    find_result = doc->getValue()->findValue(path);
617
122
                } else {
618
122
                    find_result = doc->getValue()->findValue(const_path);
619
122
                }
620
122
                obj_val = find_result.value;
621
            } else {
622
                obj_val = doc->getValue();
623
            }
624
625
122
            if (!obj_val || !obj_val->isObject()) {
626
                // if jsonb data is not object we should return null
627
113
                res_null_map[i] = 1;
628
113
                dst_arr.insert_default();
629
113
                continue;
630
113
            }
631
9
            const auto* obj = obj_val->unpack<ObjectVal>();
632
9
            for (const auto& it : *obj) {
633
9
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
634
9
            }
635
9
            dst_arr.get_offsets().push_back(dst_nested_column.size());
636
9
        } //for
637
48
        return Status::OK();
638
48
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
557
2
                                                const NullMap* path_null_map) {
558
        // if path is const, we just need to parse it once
559
2
        JsonbPath const_path;
560
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
561
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
562
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
563
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
564
                                               r_raw_ref.to_string());
565
            }
566
567
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
568
                return Status::InvalidJsonPath(
569
                        "In this situation, path expressions may not contain the * and ** tokens "
570
                        "or an array range.");
571
            }
572
        }
573
574
30
        for (size_t i = 0; i < input_rows_count; ++i) {
575
24
            auto index = index_check_const(i, JSONB_DATA_CONST);
576
            // if jsonb data is null or path column is null , we should return null
577
24
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
578
0
                res_null_map[i] = 1;
579
0
                dst_arr.insert_default();
580
0
                continue;
581
0
            }
582
24
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
583
24
                if (path_null_map && (*path_null_map)[i]) {
584
4
                    res_null_map[i] = 1;
585
4
                    dst_arr.insert_default();
586
4
                    continue;
587
4
                }
588
24
            }
589
590
20
            auto json_data = col_from_string.get_data_at(index);
591
24
            const JsonbDocument* doc = nullptr;
592
24
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
593
24
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
594
0
                dst_arr.clear();
595
0
                return Status::InvalidArgument("jsonb data is invalid");
596
0
            }
597
24
            const JsonbValue* obj_val;
598
24
            JsonbFindResult find_result;
599
24
            if constexpr (JSONB_PATH_PARAM) {
600
24
                if constexpr (!JSON_PATH_CONST) {
601
24
                    auto data = jsonb_path_column->get_data_at(i);
602
24
                    JsonbPath path;
603
24
                    if (!path.seek(data.data, data.size)) {
604
0
                        return Status::InvalidArgument(
605
0
                                "Json path error: Invalid Json Path for value: {} at row: {}",
606
0
                                std::string_view(data.data, data.size), i);
607
0
                    }
608
609
24
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
610
0
                        return Status::InvalidJsonPath(
611
0
                                "In this situation, path expressions may not contain the * and ** "
612
0
                                "tokens "
613
0
                                "or an array range. at row: {}",
614
0
                                i);
615
0
                    }
616
24
                    find_result = doc->getValue()->findValue(path);
617
                } else {
618
                    find_result = doc->getValue()->findValue(const_path);
619
                }
620
0
                obj_val = find_result.value;
621
            } else {
622
                obj_val = doc->getValue();
623
            }
624
625
24
            if (!obj_val || !obj_val->isObject()) {
626
                // if jsonb data is not object we should return null
627
8
                res_null_map[i] = 1;
628
8
                dst_arr.insert_default();
629
8
                continue;
630
8
            }
631
16
            const auto* obj = obj_val->unpack<ObjectVal>();
632
16
            for (const auto& it : *obj) {
633
16
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
634
16
            }
635
16
            dst_arr.get_offsets().push_back(dst_nested_column.size());
636
16
        } //for
637
6
        return Status::OK();
638
2
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
639
};
640
641
class FunctionJsonbExtractPath : public IFunction {
642
public:
643
    static constexpr auto name = "json_exists_path";
644
    static constexpr auto alias = "jsonb_exists_path";
645
    using ColumnType = ColumnUInt8;
646
    using Container = typename ColumnType::Container;
647
183
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); }
648
1
    String get_name() const override { return name; }
649
174
    size_t get_number_of_arguments() const override { return 2; }
650
174
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
651
        // it only needs to indicate existence and does not need to return nullable values.
652
174
        const auto nullable = std::ranges::any_of(
653
196
                arguments, [](const DataTypePtr& type) { return type->is_nullable(); });
654
174
        if (nullable) {
655
153
            return make_nullable(std::make_shared<DataTypeUInt8>());
656
153
        } else {
657
21
            return std::make_shared<DataTypeUInt8>();
658
21
        }
659
174
    }
660
661
1.53k
    bool use_default_implementation_for_nulls() const override { return false; }
662
663
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
664
1.36k
                        uint32_t result, size_t input_rows_count) const override {
665
        // prepare jsonb data column
666
1.36k
        auto&& [jsonb_data_column, jsonb_data_const] =
667
1.36k
                unpack_if_const(block.get_by_position(arguments[0]).column);
668
669
1.36k
        const NullMap* data_null_map = nullptr;
670
1.36k
        const ColumnString* data_col = nullptr;
671
1.36k
        if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
672
1.17k
            data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
673
1.17k
            data_null_map = &nullable->get_null_map_data();
674
1.17k
        } else {
675
184
            data_col = assert_cast<const ColumnString*>(jsonb_data_column.get());
676
184
        }
677
678
1.36k
        const auto& ldata = data_col->get_chars();
679
1.36k
        const auto& loffsets = data_col->get_offsets();
680
681
        // prepare parse path column prepare
682
1.36k
        auto&& [path_column, path_const] =
683
1.36k
                unpack_if_const(block.get_by_position(arguments[1]).column);
684
1.36k
        const ColumnString* path_col = nullptr;
685
1.36k
        const NullMap* path_null_map = nullptr;
686
1.36k
        if (const auto* nullable = check_and_get_column<ColumnNullable>(path_column.get())) {
687
7
            path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
688
7
            path_null_map = &nullable->get_null_map_data();
689
1.35k
        } else {
690
1.35k
            path_col = assert_cast<const ColumnString*>(path_column.get());
691
1.35k
        }
692
693
1.36k
        DORIS_CHECK(!(jsonb_data_const && path_const))
694
0
                << "jsonb_data_const and path_const should not be both const";
695
696
1.36k
        auto create_all_null_result = [&]() {
697
3
            auto res = ColumnType::create();
698
3
            res->insert_default();
699
3
            auto nullable_column =
700
3
                    ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
701
3
            auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count);
702
3
            block.get_by_position(result).column = std::move(const_column);
703
3
            return Status::OK();
704
3
        };
705
706
1.36k
        ColumnUInt8::MutablePtr result_null_map_column;
707
1.36k
        NullMap* result_null_map = nullptr;
708
1.36k
        if (data_null_map || path_null_map) {
709
1.17k
            result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
710
1.17k
            result_null_map = &result_null_map_column->get_data();
711
712
1.17k
            if (data_null_map) {
713
1.17k
                VectorizedUtils::update_null_map(*result_null_map, *data_null_map,
714
1.17k
                                                 jsonb_data_const);
715
1.17k
            }
716
717
1.17k
            if (path_null_map) {
718
7
                VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const);
719
7
            }
720
721
1.17k
            if (!simd::contain_zero(result_null_map->data(), input_rows_count)) {
722
3
                return create_all_null_result();
723
3
            }
724
1.17k
        }
725
726
1.35k
        auto res = ColumnType::create();
727
728
1.35k
        bool is_invalid_json_path = false;
729
730
1.35k
        const auto& rdata = path_col->get_chars();
731
1.35k
        const auto& roffsets = path_col->get_offsets();
732
1.35k
        if (jsonb_data_const) {
733
2
            if (data_null_map && (*data_null_map)[0]) {
734
0
                return create_all_null_result();
735
0
            }
736
2
            scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(),
737
2
                          result_null_map, is_invalid_json_path);
738
1.35k
        } else if (path_const) {
739
1.32k
            if (path_null_map && (*path_null_map)[0]) {
740
0
                return create_all_null_result();
741
0
            }
742
1.32k
            vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(),
743
1.32k
                          result_null_map, is_invalid_json_path);
744
1.32k
        } else {
745
35
            vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
746
35
                          result_null_map, is_invalid_json_path);
747
35
        }
748
1.35k
        if (is_invalid_json_path) {
749
7
            return Status::InvalidArgument(
750
7
                    "Json path error: Invalid Json Path for value: {}",
751
7
                    std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
752
7
        }
753
754
1.35k
        if (result_null_map) {
755
1.17k
            auto nullabel_col =
756
1.17k
                    ColumnNullable::create(std::move(res), std::move(result_null_map_column));
757
1.17k
            block.get_by_position(result).column = std::move(nullabel_col);
758
1.17k
        } else {
759
181
            block.get_by_position(result).column = std::move(res);
760
181
        }
761
1.35k
        return Status::OK();
762
1.35k
    }
763
764
private:
765
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
766
3.01k
                                              size_t l_str_size, JsonbPath& path) {
767
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
768
3.01k
        const JsonbDocument* doc = nullptr;
769
3.01k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
770
3.01k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
771
0
            return;
772
0
        }
773
774
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
775
3.01k
        auto result = doc->getValue()->findValue(path);
776
777
3.01k
        if (result.value) {
778
445
            res[i] = 1;
779
445
        }
780
3.01k
    }
781
    static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
782
                              const ColumnString::Offsets& loffsets,
783
                              const ColumnString::Chars& rdata,
784
                              const ColumnString::Offsets& roffsets, Container& res,
785
35
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
786
35
        const size_t size = loffsets.size();
787
35
        res.resize_fill(size, 0);
788
789
80
        for (size_t i = 0; i < size; i++) {
790
50
            if (result_null_map && (*result_null_map)[i]) {
791
8
                continue;
792
8
            }
793
794
42
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
795
42
            int l_str_size = loffsets[i] - loffsets[i - 1];
796
797
42
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
798
42
            int r_str_size = roffsets[i] - roffsets[i - 1];
799
800
42
            JsonbPath path;
801
42
            if (!path.seek(r_raw_str, r_str_size)) {
802
5
                is_invalid_json_path = true;
803
5
                return;
804
5
            }
805
806
37
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
807
37
        }
808
35
    }
809
    static void scalar_vector(FunctionContext* context, const StringRef& ldata,
810
                              const ColumnString::Chars& rdata,
811
                              const ColumnString::Offsets& roffsets, Container& res,
812
2
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
813
2
        const size_t size = roffsets.size();
814
2
        res.resize_fill(size, 0);
815
816
14
        for (size_t i = 0; i < size; i++) {
817
13
            if (result_null_map && (*result_null_map)[i]) {
818
4
                continue;
819
4
            }
820
9
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
821
9
            int r_str_size = roffsets[i] - roffsets[i - 1];
822
823
9
            JsonbPath path;
824
9
            if (!path.seek(r_raw_str, r_str_size)) {
825
1
                is_invalid_json_path = true;
826
1
                return;
827
1
            }
828
829
8
            inner_loop_impl(i, res, ldata.data, ldata.size, path);
830
8
        }
831
2
    }
832
    static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
833
                              const ColumnString::Offsets& loffsets, const StringRef& rdata,
834
                              Container& res, const NullMap* result_null_map,
835
1.32k
                              bool& is_invalid_json_path) {
836
1.32k
        const size_t size = loffsets.size();
837
1.32k
        res.resize_fill(size, 0);
838
839
1.32k
        JsonbPath path;
840
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
841
1
            is_invalid_json_path = true;
842
1
            return;
843
1
        }
844
845
4.51k
        for (size_t i = 0; i < size; i++) {
846
3.19k
            if (result_null_map && (*result_null_map)[i]) {
847
232
                continue;
848
232
            }
849
2.96k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
850
2.96k
            int l_str_size = loffsets[i] - loffsets[i - 1];
851
852
2.96k
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
853
2.96k
        }
854
1.32k
    }
855
};
856
857
template <typename ValueType>
858
struct JsonbExtractStringImpl {
859
    using ReturnType = typename ValueType::ReturnType;
860
    using ColumnType = typename ValueType::ColumnType;
861
862
private:
863
    static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i,
864
                                              ColumnString::Chars& res_data,
865
                                              ColumnString::Offsets& res_offsets, NullMap& null_map,
866
                                              std::unique_ptr<JsonbToJson>& formater,
867
139k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
868
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
869
139k
        const JsonbDocument* doc = nullptr;
870
139k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
871
139k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
872
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
873
0
            return;
874
0
        }
875
876
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
877
139k
        auto find_result = doc->getValue()->findValue(path);
878
879
139k
        if (UNLIKELY(!find_result.value)) {
880
20.9k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
20.9k
            return;
882
20.9k
        }
883
884
118k
        if constexpr (ValueType::only_get_type) {
885
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
886
429
                                        res_data, res_offsets);
887
429
            return;
888
117k
        } else {
889
117k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
890
117k
            if constexpr (ValueType::no_quotes) {
891
5
                if (find_result.value->isString()) {
892
4
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
893
4
                    const auto* blob = str_value->getBlob();
894
4
                    if (str_value->length() > 1 && blob[0] == '"' &&
895
4
                        blob[str_value->length() - 1] == '"') {
896
0
                        writer->writeStartString();
897
0
                        writer->writeString(blob + 1, str_value->length() - 2);
898
0
                        writer->writeEndString();
899
0
                        StringOP::push_value_string(
900
0
                                std::string_view(writer->getOutput()->getBuffer(),
901
0
                                                 writer->getOutput()->getSize()),
902
0
                                i, res_data, res_offsets);
903
0
                        return;
904
0
                    }
905
4
                }
906
5
            }
907
5
            writer->writeValueSimple(find_result.value);
908
117k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
909
117k
                                                         writer->getOutput()->getSize()),
910
117k
                                        i, res_data, res_offsets);
911
117k
        }
912
118k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
867
2.98k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
868
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
869
2.98k
        const JsonbDocument* doc = nullptr;
870
2.98k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
871
2.98k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
872
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
873
0
            return;
874
0
        }
875
876
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
877
2.98k
        auto find_result = doc->getValue()->findValue(path);
878
879
2.98k
        if (UNLIKELY(!find_result.value)) {
880
2.55k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
2.55k
            return;
882
2.55k
        }
883
884
429
        if constexpr (ValueType::only_get_type) {
885
429
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
886
429
                                        res_data, res_offsets);
887
429
            return;
888
        } else {
889
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
890
            if constexpr (ValueType::no_quotes) {
891
                if (find_result.value->isString()) {
892
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
893
                    const auto* blob = str_value->getBlob();
894
                    if (str_value->length() > 1 && blob[0] == '"' &&
895
                        blob[str_value->length() - 1] == '"') {
896
                        writer->writeStartString();
897
                        writer->writeString(blob + 1, str_value->length() - 2);
898
                        writer->writeEndString();
899
                        StringOP::push_value_string(
900
                                std::string_view(writer->getOutput()->getBuffer(),
901
                                                 writer->getOutput()->getSize()),
902
                                i, res_data, res_offsets);
903
                        return;
904
                    }
905
                }
906
            }
907
            writer->writeValueSimple(find_result.value);
908
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
909
                                                         writer->getOutput()->getSize()),
910
                                        i, res_data, res_offsets);
911
        }
912
429
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
867
136k
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
868
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
869
136k
        const JsonbDocument* doc = nullptr;
870
136k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
871
136k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
872
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
873
0
            return;
874
0
        }
875
876
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
877
136k
        auto find_result = doc->getValue()->findValue(path);
878
879
136k
        if (UNLIKELY(!find_result.value)) {
880
18.3k
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
18.3k
            return;
882
18.3k
        }
883
884
        if constexpr (ValueType::only_get_type) {
885
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
886
                                        res_data, res_offsets);
887
            return;
888
117k
        } else {
889
117k
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
890
            if constexpr (ValueType::no_quotes) {
891
                if (find_result.value->isString()) {
892
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
893
                    const auto* blob = str_value->getBlob();
894
                    if (str_value->length() > 1 && blob[0] == '"' &&
895
                        blob[str_value->length() - 1] == '"') {
896
                        writer->writeStartString();
897
                        writer->writeString(blob + 1, str_value->length() - 2);
898
                        writer->writeEndString();
899
                        StringOP::push_value_string(
900
                                std::string_view(writer->getOutput()->getBuffer(),
901
                                                 writer->getOutput()->getSize()),
902
                                i, res_data, res_offsets);
903
                        return;
904
                    }
905
                }
906
            }
907
117k
            writer->writeValueSimple(find_result.value);
908
117k
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
909
117k
                                                         writer->getOutput()->getSize()),
910
117k
                                        i, res_data, res_offsets);
911
117k
        }
912
117k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_RSt10unique_ptrINS_11JsonbToJsonESt14default_deleteISG_EEPKcmRNS_9JsonbPathE
Line
Count
Source
867
5
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
868
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
869
5
        const JsonbDocument* doc = nullptr;
870
5
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
871
5
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
872
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
873
0
            return;
874
0
        }
875
876
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
877
5
        auto find_result = doc->getValue()->findValue(path);
878
879
5
        if (UNLIKELY(!find_result.value)) {
880
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
881
0
            return;
882
0
        }
883
884
        if constexpr (ValueType::only_get_type) {
885
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
886
                                        res_data, res_offsets);
887
            return;
888
5
        } else {
889
5
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
890
5
            if constexpr (ValueType::no_quotes) {
891
5
                if (find_result.value->isString()) {
892
4
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
893
4
                    const auto* blob = str_value->getBlob();
894
4
                    if (str_value->length() > 1 && blob[0] == '"' &&
895
4
                        blob[str_value->length() - 1] == '"') {
896
0
                        writer->writeStartString();
897
0
                        writer->writeString(blob + 1, str_value->length() - 2);
898
0
                        writer->writeEndString();
899
0
                        StringOP::push_value_string(
900
0
                                std::string_view(writer->getOutput()->getBuffer(),
901
0
                                                 writer->getOutput()->getSize()),
902
0
                                i, res_data, res_offsets);
903
0
                        return;
904
0
                    }
905
4
                }
906
5
            }
907
5
            writer->writeValueSimple(find_result.value);
908
5
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
909
5
                                                         writer->getOutput()->getSize()),
910
5
                                        i, res_data, res_offsets);
911
5
        }
912
5
    }
913
914
public:
915
    // for jsonb_extract_string
916
    static Status vector_vector_v2(
917
            FunctionContext* context, const ColumnString::Chars& ldata,
918
            const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
919
            const bool& json_data_const,
920
            const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
921
            const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const,
922
11.3k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
923
11.3k
        const size_t input_rows_count = null_map.size();
924
11.3k
        res_offsets.resize(input_rows_count);
925
926
11.3k
        auto writer = std::make_unique<JsonbWriter>();
927
11.3k
        std::unique_ptr<JsonbToJson> formater;
928
929
        // reuseable json path list, espacially for const path
930
11.3k
        std::vector<JsonbPath> json_path_list;
931
11.3k
        json_path_list.resize(rdata_columns.size());
932
933
        // lambda function to parse json path for row i and path pi
934
11.7k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
11.7k
            const auto index = index_check_const(i, path_const[pi]);
936
937
11.7k
            const ColumnString* path_col = rdata_columns[pi];
938
11.7k
            const ColumnString::Chars& rdata = path_col->get_chars();
939
11.7k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
11.7k
            size_t r_off = roffsets[index - 1];
941
11.7k
            size_t r_size = roffsets[index] - r_off;
942
11.7k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
11.7k
            JsonbPath path;
945
11.7k
            if (!path.seek(r_raw, r_size)) {
946
7
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
7
                                               std::string_view(r_raw, r_size));
948
7
            }
949
950
11.7k
            json_path_list[pi] = std::move(path);
951
952
11.7k
            return Status::OK();
953
11.7k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
934
1.34k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
1.34k
            const auto index = index_check_const(i, path_const[pi]);
936
937
1.34k
            const ColumnString* path_col = rdata_columns[pi];
938
1.34k
            const ColumnString::Chars& rdata = path_col->get_chars();
939
1.34k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
1.34k
            size_t r_off = roffsets[index - 1];
941
1.34k
            size_t r_size = roffsets[index] - r_off;
942
1.34k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
1.34k
            JsonbPath path;
945
1.34k
            if (!path.seek(r_raw, r_size)) {
946
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
1
                                               std::string_view(r_raw, r_size));
948
1
            }
949
950
1.33k
            json_path_list[pi] = std::move(path);
951
952
1.33k
            return Status::OK();
953
1.34k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
934
10.3k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
10.3k
            const auto index = index_check_const(i, path_const[pi]);
936
937
10.3k
            const ColumnString* path_col = rdata_columns[pi];
938
10.3k
            const ColumnString::Chars& rdata = path_col->get_chars();
939
10.3k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
10.3k
            size_t r_off = roffsets[index - 1];
941
10.3k
            size_t r_size = roffsets[index] - r_off;
942
10.3k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
10.3k
            JsonbPath path;
945
10.3k
            if (!path.seek(r_raw, r_size)) {
946
6
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
6
                                               std::string_view(r_raw, r_size));
948
6
            }
949
950
10.3k
            json_path_list[pi] = std::move(path);
951
952
10.3k
            return Status::OK();
953
10.3k
        };
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
934
11
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
11
            const auto index = index_check_const(i, path_const[pi]);
936
937
11
            const ColumnString* path_col = rdata_columns[pi];
938
11
            const ColumnString::Chars& rdata = path_col->get_chars();
939
11
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
11
            size_t r_off = roffsets[index - 1];
941
11
            size_t r_size = roffsets[index] - r_off;
942
11
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
11
            JsonbPath path;
945
11
            if (!path.seek(r_raw, r_size)) {
946
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
0
                                               std::string_view(r_raw, r_size));
948
0
            }
949
950
11
            json_path_list[pi] = std::move(path);
951
952
11
            return Status::OK();
953
11
        };
954
955
23.0k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
956
11.6k
            if (path_const[pi]) {
957
11.3k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
958
41
                    continue;
959
41
                }
960
11.3k
                RETURN_IF_ERROR(parse_json_path(0, pi));
961
11.3k
            }
962
11.6k
        }
963
964
11.3k
        res_data.reserve(ldata.size());
965
152k
        for (size_t i = 0; i < input_rows_count; ++i) {
966
141k
            if (null_map[i]) {
967
0
                continue;
968
0
            }
969
970
141k
            const auto data_index = index_check_const(i, json_data_const);
971
141k
            if (l_null_map && (*l_null_map)[data_index]) {
972
1.91k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
973
1.91k
                continue;
974
1.91k
            }
975
976
139k
            size_t l_off = loffsets[data_index - 1];
977
139k
            size_t l_size = loffsets[data_index] - l_off;
978
139k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
979
139k
            if (rdata_columns.size() == 1) { // just return origin value
980
138k
                const auto path_index = index_check_const(i, path_const[0]);
981
138k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
982
33
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
983
33
                    continue;
984
33
                }
985
986
138k
                if (!path_const[0]) {
987
328
                    RETURN_IF_ERROR(parse_json_path(i, 0));
988
328
                }
989
990
138k
                writer->reset();
991
138k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
992
138k
                                l_size, json_path_list[0]);
993
138k
            } else { // will make array string to user
994
654
                writer->reset();
995
654
                bool has_value = false;
996
997
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
998
654
                const JsonbDocument* doc = nullptr;
999
654
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1000
1001
1.78k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1002
1.23k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1003
0
                        continue;
1004
0
                    }
1005
1006
1.23k
                    const auto path_index = index_check_const(i, path_const[pi]);
1007
1.23k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1008
99
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1009
99
                        break;
1010
99
                    }
1011
1012
1.13k
                    if (!path_const[pi]) {
1013
70
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1014
70
                    }
1015
1016
1.13k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1017
1018
1.13k
                    if (find_result.value) {
1019
282
                        if (!has_value) {
1020
153
                            has_value = true;
1021
153
                            writer->writeStartArray();
1022
153
                        }
1023
282
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1024
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1025
                            // if value is array, we should write all items in array, instead of write the array itself.
1026
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1027
54
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1028
54
                                writer->writeValue(&item);
1029
54
                            }
1030
259
                        } else {
1031
259
                            writer->writeValue(find_result.value);
1032
259
                        }
1033
282
                    }
1034
1.13k
                }
1035
654
                if (has_value) {
1036
153
                    writer->writeEndArray();
1037
153
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1038
153
                                                                 writer->getOutput()->getSize()),
1039
153
                                                i, res_data, res_offsets);
1040
501
                } else {
1041
501
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1042
501
                }
1043
654
            }
1044
139k
        } //for
1045
11.3k
        return Status::OK();
1046
11.3k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
922
1.32k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
923
1.32k
        const size_t input_rows_count = null_map.size();
924
1.32k
        res_offsets.resize(input_rows_count);
925
926
1.32k
        auto writer = std::make_unique<JsonbWriter>();
927
1.32k
        std::unique_ptr<JsonbToJson> formater;
928
929
        // reuseable json path list, espacially for const path
930
1.32k
        std::vector<JsonbPath> json_path_list;
931
1.32k
        json_path_list.resize(rdata_columns.size());
932
933
        // lambda function to parse json path for row i and path pi
934
1.32k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
1.32k
            const auto index = index_check_const(i, path_const[pi]);
936
937
1.32k
            const ColumnString* path_col = rdata_columns[pi];
938
1.32k
            const ColumnString::Chars& rdata = path_col->get_chars();
939
1.32k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
1.32k
            size_t r_off = roffsets[index - 1];
941
1.32k
            size_t r_size = roffsets[index] - r_off;
942
1.32k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
1.32k
            JsonbPath path;
945
1.32k
            if (!path.seek(r_raw, r_size)) {
946
1.32k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
1.32k
                                               std::string_view(r_raw, r_size));
948
1.32k
            }
949
950
1.32k
            json_path_list[pi] = std::move(path);
951
952
1.32k
            return Status::OK();
953
1.32k
        };
954
955
2.65k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
956
1.32k
            if (path_const[pi]) {
957
1.32k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
958
1
                    continue;
959
1
                }
960
1.32k
                RETURN_IF_ERROR(parse_json_path(0, pi));
961
1.32k
            }
962
1.32k
        }
963
964
1.32k
        res_data.reserve(ldata.size());
965
4.57k
        for (size_t i = 0; i < input_rows_count; ++i) {
966
3.24k
            if (null_map[i]) {
967
0
                continue;
968
0
            }
969
970
3.24k
            const auto data_index = index_check_const(i, json_data_const);
971
3.24k
            if (l_null_map && (*l_null_map)[data_index]) {
972
248
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
973
248
                continue;
974
248
            }
975
976
3.00k
            size_t l_off = loffsets[data_index - 1];
977
3.00k
            size_t l_size = loffsets[data_index] - l_off;
978
3.00k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
979
3.00k
            if (rdata_columns.size() == 1) { // just return origin value
980
3.00k
                const auto path_index = index_check_const(i, path_const[0]);
981
3.00k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
982
16
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
983
16
                    continue;
984
16
                }
985
986
2.98k
                if (!path_const[0]) {
987
18
                    RETURN_IF_ERROR(parse_json_path(i, 0));
988
18
                }
989
990
2.98k
                writer->reset();
991
2.98k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
992
2.98k
                                l_size, json_path_list[0]);
993
2.98k
            } else { // will make array string to user
994
0
                writer->reset();
995
0
                bool has_value = false;
996
997
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
998
0
                const JsonbDocument* doc = nullptr;
999
0
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1000
1001
0
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1002
0
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1003
0
                        continue;
1004
0
                    }
1005
1006
0
                    const auto path_index = index_check_const(i, path_const[pi]);
1007
0
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1008
0
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1009
0
                        break;
1010
0
                    }
1011
1012
0
                    if (!path_const[pi]) {
1013
0
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1014
0
                    }
1015
1016
0
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1017
1018
0
                    if (find_result.value) {
1019
0
                        if (!has_value) {
1020
0
                            has_value = true;
1021
0
                            writer->writeStartArray();
1022
0
                        }
1023
0
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1024
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1025
                            // if value is array, we should write all items in array, instead of write the array itself.
1026
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1027
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1028
0
                                writer->writeValue(&item);
1029
0
                            }
1030
0
                        } else {
1031
0
                            writer->writeValue(find_result.value);
1032
0
                        }
1033
0
                    }
1034
0
                }
1035
0
                if (has_value) {
1036
0
                    writer->writeEndArray();
1037
0
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1038
0
                                                                 writer->getOutput()->getSize()),
1039
0
                                                i, res_data, res_offsets);
1040
0
                } else {
1041
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1042
0
                }
1043
0
            }
1044
3.00k
        } //for
1045
1.32k
        return Status::OK();
1046
1.32k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
922
10.0k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
923
10.0k
        const size_t input_rows_count = null_map.size();
924
10.0k
        res_offsets.resize(input_rows_count);
925
926
10.0k
        auto writer = std::make_unique<JsonbWriter>();
927
10.0k
        std::unique_ptr<JsonbToJson> formater;
928
929
        // reuseable json path list, espacially for const path
930
10.0k
        std::vector<JsonbPath> json_path_list;
931
10.0k
        json_path_list.resize(rdata_columns.size());
932
933
        // lambda function to parse json path for row i and path pi
934
10.0k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
10.0k
            const auto index = index_check_const(i, path_const[pi]);
936
937
10.0k
            const ColumnString* path_col = rdata_columns[pi];
938
10.0k
            const ColumnString::Chars& rdata = path_col->get_chars();
939
10.0k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
10.0k
            size_t r_off = roffsets[index - 1];
941
10.0k
            size_t r_size = roffsets[index] - r_off;
942
10.0k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
10.0k
            JsonbPath path;
945
10.0k
            if (!path.seek(r_raw, r_size)) {
946
10.0k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
10.0k
                                               std::string_view(r_raw, r_size));
948
10.0k
            }
949
950
10.0k
            json_path_list[pi] = std::move(path);
951
952
10.0k
            return Status::OK();
953
10.0k
        };
954
955
20.3k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
956
10.3k
            if (path_const[pi]) {
957
10.0k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
958
40
                    continue;
959
40
                }
960
10.0k
                RETURN_IF_ERROR(parse_json_path(0, pi));
961
10.0k
            }
962
10.3k
        }
963
964
10.0k
        res_data.reserve(ldata.size());
965
147k
        for (size_t i = 0; i < input_rows_count; ++i) {
966
137k
            if (null_map[i]) {
967
0
                continue;
968
0
            }
969
970
137k
            const auto data_index = index_check_const(i, json_data_const);
971
137k
            if (l_null_map && (*l_null_map)[data_index]) {
972
1.66k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
973
1.66k
                continue;
974
1.66k
            }
975
976
136k
            size_t l_off = loffsets[data_index - 1];
977
136k
            size_t l_size = loffsets[data_index] - l_off;
978
136k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
979
136k
            if (rdata_columns.size() == 1) { // just return origin value
980
135k
                const auto path_index = index_check_const(i, path_const[0]);
981
135k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
982
17
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
983
17
                    continue;
984
17
                }
985
986
135k
                if (!path_const[0]) {
987
305
                    RETURN_IF_ERROR(parse_json_path(i, 0));
988
305
                }
989
990
135k
                writer->reset();
991
135k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
992
135k
                                l_size, json_path_list[0]);
993
135k
            } else { // will make array string to user
994
651
                writer->reset();
995
651
                bool has_value = false;
996
997
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
998
651
                const JsonbDocument* doc = nullptr;
999
651
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1000
1001
1.77k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1002
1.22k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1003
0
                        continue;
1004
0
                    }
1005
1006
1.22k
                    const auto path_index = index_check_const(i, path_const[pi]);
1007
1.22k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1008
98
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1009
98
                        break;
1010
98
                    }
1011
1012
1.12k
                    if (!path_const[pi]) {
1013
64
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1014
64
                    }
1015
1016
1.12k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1017
1018
1.12k
                    if (find_result.value) {
1019
276
                        if (!has_value) {
1020
150
                            has_value = true;
1021
150
                            writer->writeStartArray();
1022
150
                        }
1023
276
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1024
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1025
                            // if value is array, we should write all items in array, instead of write the array itself.
1026
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1027
54
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1028
54
                                writer->writeValue(&item);
1029
54
                            }
1030
253
                        } else {
1031
253
                            writer->writeValue(find_result.value);
1032
253
                        }
1033
276
                    }
1034
1.12k
                }
1035
651
                if (has_value) {
1036
150
                    writer->writeEndArray();
1037
150
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1038
150
                                                                 writer->getOutput()->getSize()),
1039
150
                                                i, res_data, res_offsets);
1040
501
                } else {
1041
501
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1042
501
                }
1043
651
            }
1044
136k
        } //for
1045
9.99k
        return Status::OK();
1046
10.0k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
922
9
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
923
9
        const size_t input_rows_count = null_map.size();
924
9
        res_offsets.resize(input_rows_count);
925
926
9
        auto writer = std::make_unique<JsonbWriter>();
927
9
        std::unique_ptr<JsonbToJson> formater;
928
929
        // reuseable json path list, espacially for const path
930
9
        std::vector<JsonbPath> json_path_list;
931
9
        json_path_list.resize(rdata_columns.size());
932
933
        // lambda function to parse json path for row i and path pi
934
9
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
935
9
            const auto index = index_check_const(i, path_const[pi]);
936
937
9
            const ColumnString* path_col = rdata_columns[pi];
938
9
            const ColumnString::Chars& rdata = path_col->get_chars();
939
9
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
940
9
            size_t r_off = roffsets[index - 1];
941
9
            size_t r_size = roffsets[index] - r_off;
942
9
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
943
944
9
            JsonbPath path;
945
9
            if (!path.seek(r_raw, r_size)) {
946
9
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
947
9
                                               std::string_view(r_raw, r_size));
948
9
            }
949
950
9
            json_path_list[pi] = std::move(path);
951
952
9
            return Status::OK();
953
9
        };
954
955
22
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
956
13
            if (path_const[pi]) {
957
0
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
958
0
                    continue;
959
0
                }
960
0
                RETURN_IF_ERROR(parse_json_path(0, pi));
961
0
            }
962
13
        }
963
964
9
        res_data.reserve(ldata.size());
965
18
        for (size_t i = 0; i < input_rows_count; ++i) {
966
9
            if (null_map[i]) {
967
0
                continue;
968
0
            }
969
970
9
            const auto data_index = index_check_const(i, json_data_const);
971
9
            if (l_null_map && (*l_null_map)[data_index]) {
972
1
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
973
1
                continue;
974
1
            }
975
976
8
            size_t l_off = loffsets[data_index - 1];
977
8
            size_t l_size = loffsets[data_index] - l_off;
978
8
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
979
8
            if (rdata_columns.size() == 1) { // just return origin value
980
5
                const auto path_index = index_check_const(i, path_const[0]);
981
5
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
982
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
983
0
                    continue;
984
0
                }
985
986
5
                if (!path_const[0]) {
987
5
                    RETURN_IF_ERROR(parse_json_path(i, 0));
988
5
                }
989
990
5
                writer->reset();
991
5
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, formater, l_raw,
992
5
                                l_size, json_path_list[0]);
993
5
            } else { // will make array string to user
994
3
                writer->reset();
995
3
                bool has_value = false;
996
997
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
998
3
                const JsonbDocument* doc = nullptr;
999
3
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
1000
1001
9
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
1002
7
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1003
0
                        continue;
1004
0
                    }
1005
1006
7
                    const auto path_index = index_check_const(i, path_const[pi]);
1007
7
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1008
1
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1009
1
                        break;
1010
1
                    }
1011
1012
6
                    if (!path_const[pi]) {
1013
6
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1014
6
                    }
1015
1016
6
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1017
1018
6
                    if (find_result.value) {
1019
6
                        if (!has_value) {
1020
3
                            has_value = true;
1021
3
                            writer->writeStartArray();
1022
3
                        }
1023
6
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1024
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1025
                            // if value is array, we should write all items in array, instead of write the array itself.
1026
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1027
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1028
0
                                writer->writeValue(&item);
1029
0
                            }
1030
6
                        } else {
1031
6
                            writer->writeValue(find_result.value);
1032
6
                        }
1033
6
                    }
1034
6
                }
1035
3
                if (has_value) {
1036
3
                    writer->writeEndArray();
1037
3
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1038
3
                                                                 writer->getOutput()->getSize()),
1039
3
                                                i, res_data, res_offsets);
1040
3
                } else {
1041
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1042
0
                }
1043
3
            }
1044
8
        } //for
1045
9
        return Status::OK();
1046
9
    }
1047
1048
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1049
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1050
                                const ColumnString::Chars& rdata,
1051
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1052
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1053
                                NullMap& null_map) {
1054
        size_t input_rows_count = loffsets.size();
1055
        res_offsets.resize(input_rows_count);
1056
1057
        std::unique_ptr<JsonbToJson> formater;
1058
1059
        JsonbWriter writer;
1060
        for (size_t i = 0; i < input_rows_count; ++i) {
1061
            if (l_null_map && (*l_null_map)[i]) {
1062
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1063
                continue;
1064
            }
1065
1066
            if (r_null_map && (*r_null_map)[i]) {
1067
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1068
                continue;
1069
            }
1070
1071
            int l_size = loffsets[i] - loffsets[i - 1];
1072
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1073
1074
            int r_size = roffsets[i] - roffsets[i - 1];
1075
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1076
1077
            JsonbPath path;
1078
            if (!path.seek(r_raw, r_size)) {
1079
                return Status::InvalidArgument(
1080
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1081
                        std::string_view(r_raw, r_size), i);
1082
            }
1083
1084
            writer.reset();
1085
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1086
                            path);
1087
        } //for
1088
        return Status::OK();
1089
    } //function
1090
1091
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1092
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1093
                                const StringRef& rdata, ColumnString::Chars& res_data,
1094
                                ColumnString::Offsets& res_offsets, NullMap& null_map) {
1095
        size_t input_rows_count = loffsets.size();
1096
        res_offsets.resize(input_rows_count);
1097
1098
        std::unique_ptr<JsonbToJson> formater;
1099
1100
        JsonbPath path;
1101
        if (!path.seek(rdata.data, rdata.size)) {
1102
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1103
                                           std::string_view(rdata.data, rdata.size));
1104
        }
1105
1106
        JsonbWriter writer;
1107
        for (size_t i = 0; i < input_rows_count; ++i) {
1108
            if (l_null_map && (*l_null_map)[i]) {
1109
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1110
                continue;
1111
            }
1112
1113
            int l_size = loffsets[i] - loffsets[i - 1];
1114
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1115
1116
            writer.reset();
1117
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, l_raw, l_size,
1118
                            path);
1119
        } //for
1120
        return Status::OK();
1121
    } //function
1122
1123
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1124
                                const ColumnString::Chars& rdata,
1125
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1126
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1127
                                NullMap& null_map) {
1128
        size_t input_rows_count = roffsets.size();
1129
        res_offsets.resize(input_rows_count);
1130
1131
        std::unique_ptr<JsonbToJson> formater;
1132
1133
        JsonbWriter writer;
1134
1135
        for (size_t i = 0; i < input_rows_count; ++i) {
1136
            if (r_null_map && (*r_null_map)[i]) {
1137
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1138
                continue;
1139
            }
1140
1141
            int r_size = roffsets[i] - roffsets[i - 1];
1142
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1143
1144
            JsonbPath path;
1145
            if (!path.seek(r_raw, r_size)) {
1146
                return Status::InvalidArgument(
1147
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1148
                        std::string_view(r_raw, r_size), i);
1149
            }
1150
1151
            writer.reset();
1152
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, formater, ldata.data,
1153
                            ldata.size, path);
1154
        } //for
1155
        return Status::OK();
1156
    } //function
1157
};
1158
1159
struct JsonbExtractIsnull {
1160
    static constexpr auto name = "json_extract_isnull";
1161
    static constexpr auto alias = "jsonb_extract_isnull";
1162
1163
    using ReturnType = DataTypeUInt8;
1164
    using ColumnType = ColumnUInt8;
1165
    using Container = typename ColumnType::Container;
1166
1167
private:
1168
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
1169
                                              const char* l_raw_str, size_t l_str_size,
1170
2.97k
                                              JsonbPath& path) {
1171
2.97k
        if (null_map[i]) {
1172
0
            res[i] = 0;
1173
0
            return;
1174
0
        }
1175
1176
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1177
2.97k
        const JsonbDocument* doc = nullptr;
1178
2.97k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
1179
2.97k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1180
0
            null_map[i] = 1;
1181
0
            res[i] = 0;
1182
0
            return;
1183
0
        }
1184
1185
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
1186
2.97k
        auto find_result = doc->getValue()->findValue(path);
1187
2.97k
        const auto* value = find_result.value;
1188
1189
2.97k
        if (UNLIKELY(!value)) {
1190
2.55k
            null_map[i] = 1;
1191
2.55k
            res[i] = 0;
1192
2.55k
            return;
1193
2.55k
        }
1194
1195
420
        res[i] = value->isNull();
1196
420
    }
1197
1198
public:
1199
    // for jsonb_extract_int/int64/double
1200
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1201
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1202
                                const ColumnString::Chars& rdata,
1203
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1204
4
                                Container& res, NullMap& null_map) {
1205
4
        size_t size = loffsets.size();
1206
4
        res.resize(size);
1207
1208
19
        for (size_t i = 0; i < loffsets.size(); i++) {
1209
15
            if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) {
1210
8
                res[i] = 0;
1211
8
                null_map[i] = 1;
1212
8
                continue;
1213
8
            }
1214
1215
7
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1216
7
            int l_str_size = loffsets[i] - loffsets[i - 1];
1217
1218
7
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1219
7
            int r_str_size = roffsets[i] - roffsets[i - 1];
1220
1221
7
            JsonbPath path;
1222
7
            if (!path.seek(r_raw_str, r_str_size)) {
1223
0
                return Status::InvalidArgument(
1224
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1225
0
                        std::string_view(r_raw_str, r_str_size), i);
1226
0
            }
1227
1228
7
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1229
7
        } //for
1230
4
        return Status::OK();
1231
4
    } //function
1232
1233
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1234
                                const ColumnString::Chars& rdata,
1235
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1236
1
                                Container& res, NullMap& null_map) {
1237
1
        size_t size = roffsets.size();
1238
1
        res.resize(size);
1239
1240
13
        for (size_t i = 0; i < size; i++) {
1241
12
            if (r_null_map && (*r_null_map)[i]) {
1242
4
                res[i] = 0;
1243
4
                null_map[i] = 1;
1244
4
                continue;
1245
4
            }
1246
1247
8
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1248
8
            int r_str_size = roffsets[i] - roffsets[i - 1];
1249
1250
8
            JsonbPath path;
1251
8
            if (!path.seek(r_raw_str, r_str_size)) {
1252
0
                return Status::InvalidArgument(
1253
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1254
0
                        std::string_view(r_raw_str, r_str_size), i);
1255
0
            }
1256
1257
8
            inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path);
1258
8
        } //for
1259
1
        return Status::OK();
1260
1
    } //function
1261
1262
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1263
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1264
1.32k
                                const StringRef& rdata, Container& res, NullMap& null_map) {
1265
1.32k
        size_t size = loffsets.size();
1266
1.32k
        res.resize(size);
1267
1268
1.32k
        JsonbPath path;
1269
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
1270
0
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1271
0
                                           std::string_view(rdata.data, rdata.size));
1272
0
        }
1273
1274
4.50k
        for (size_t i = 0; i < loffsets.size(); i++) {
1275
3.18k
            if (l_null_map && (*l_null_map)[i]) {
1276
228
                res[i] = 0;
1277
228
                null_map[i] = 1;
1278
228
                continue;
1279
228
            }
1280
1281
2.95k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1282
2.95k
            int l_str_size = loffsets[i] - loffsets[i - 1];
1283
1284
2.95k
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1285
2.95k
        } //for
1286
1.32k
        return Status::OK();
1287
1.32k
    } //function
1288
};
1289
1290
struct JsonbTypeJson {
1291
    using T = std::string;
1292
    using ReturnType = DataTypeJsonb;
1293
    using ColumnType = ColumnString;
1294
    static const bool only_get_type = false;
1295
    static const bool no_quotes = false;
1296
};
1297
1298
struct JsonbTypeJsonNoQuotes {
1299
    using T = std::string;
1300
    using ReturnType = DataTypeJsonb;
1301
    using ColumnType = ColumnString;
1302
    static const bool only_get_type = false;
1303
    static const bool no_quotes = true;
1304
};
1305
1306
struct JsonbTypeType {
1307
    using T = std::string;
1308
    using ReturnType = DataTypeString;
1309
    using ColumnType = ColumnString;
1310
    static const bool only_get_type = true;
1311
    static const bool no_quotes = false;
1312
};
1313
1314
struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> {
1315
    static constexpr auto name = "jsonb_extract";
1316
    static constexpr auto alias = "json_extract";
1317
};
1318
1319
struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> {
1320
    static constexpr auto name = "jsonb_extract_no_quotes";
1321
    static constexpr auto alias = "json_extract_no_quotes";
1322
};
1323
1324
struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> {
1325
    static constexpr auto name = "json_type";
1326
    static constexpr auto alias = "jsonb_type";
1327
};
1328
1329
using FunctionJsonbExists = FunctionJsonbExtractPath;
1330
using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>;
1331
1332
using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
1333
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
1334
using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>;
1335
1336
template <typename Impl>
1337
class FunctionJsonbLength : public IFunction {
1338
public:
1339
    static constexpr auto name = "json_length";
1340
1
    String get_name() const override { return name; }
1341
794
    static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); }
1342
1343
785
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1344
785
        return make_nullable(std::make_shared<DataTypeInt32>());
1345
785
    }
1346
793
    DataTypes get_variadic_argument_types_impl() const override {
1347
793
        return Impl::get_variadic_argument_types();
1348
793
    }
1349
785
    size_t get_number_of_arguments() const override {
1350
785
        return get_variadic_argument_types_impl().size();
1351
785
    }
1352
1353
1.57k
    bool use_default_implementation_for_nulls() const override { return false; }
1354
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1355
791
                        uint32_t result, size_t input_rows_count) const override {
1356
791
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1357
791
    }
1358
};
1359
1360
struct JsonbLengthUtil {
1361
    static Status jsonb_length_execute(FunctionContext* context, Block& block,
1362
                                       const ColumnNumbers& arguments, uint32_t result,
1363
791
                                       size_t input_rows_count) {
1364
791
        DORIS_CHECK_GE(arguments.size(), 2);
1365
791
        ColumnPtr jsonb_data_column;
1366
791
        bool jsonb_data_const = false;
1367
        // prepare jsonb data column
1368
791
        std::tie(jsonb_data_column, jsonb_data_const) =
1369
791
                unpack_if_const(block.get_by_position(arguments[0]).column);
1370
791
        ColumnPtr path_column;
1371
791
        bool is_const = false;
1372
791
        std::tie(path_column, is_const) =
1373
791
                unpack_if_const(block.get_by_position(arguments[1]).column);
1374
1375
791
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1376
791
        auto return_type = block.get_data_type(result);
1377
791
        MutableColumnPtr res = return_type->create_column();
1378
1379
791
        JsonbPath path;
1380
791
        if (is_const) {
1381
763
            if (path_column->is_null_at(0)) {
1382
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1383
1
                    null_map->get_data()[i] = 1;
1384
1
                    res->insert_data(nullptr, 0);
1385
1
                }
1386
1387
1
                block.replace_by_position(
1388
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1389
1
                return Status::OK();
1390
1
            }
1391
1392
762
            auto path_value = path_column->get_data_at(0);
1393
762
            if (!path.seek(path_value.data, path_value.size)) {
1394
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1395
0
                                               std::string_view(path_value.data, path_value.size));
1396
0
            }
1397
762
        }
1398
1399
1.66k
        for (size_t i = 0; i < input_rows_count; ++i) {
1400
877
            if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) ||
1401
877
                (jsonb_data_column->get_data_at(i).size == 0)) {
1402
18
                null_map->get_data()[i] = 1;
1403
18
                res->insert_data(nullptr, 0);
1404
18
                continue;
1405
18
            }
1406
859
            if (!is_const) {
1407
25
                auto path_value = path_column->get_data_at(i);
1408
25
                path.clean();
1409
25
                if (!path.seek(path_value.data, path_value.size)) {
1410
0
                    return Status::InvalidArgument(
1411
0
                            "Json path error: Invalid Json Path for value: {}",
1412
0
                            std::string_view(path_value.data, path_value.size));
1413
0
                }
1414
25
            }
1415
859
            auto jsonb_value = jsonb_data_column->get_data_at(i);
1416
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1417
859
            const JsonbDocument* doc = nullptr;
1418
859
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
1419
859
                                                                  jsonb_value.size, &doc));
1420
859
            auto find_result = doc->getValue()->findValue(path);
1421
859
            const auto* value = find_result.value;
1422
859
            if (UNLIKELY(!value)) {
1423
74
                null_map->get_data()[i] = 1;
1424
74
                res->insert_data(nullptr, 0);
1425
74
                continue;
1426
74
            }
1427
785
            auto length = value->numElements();
1428
785
            res->insert_data(const_cast<const char*>((char*)&length), 0);
1429
785
        }
1430
790
        block.replace_by_position(result,
1431
790
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1432
790
        return Status::OK();
1433
790
    }
1434
};
1435
1436
struct JsonbLengthAndPathImpl {
1437
793
    static DataTypes get_variadic_argument_types() {
1438
793
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
1439
793
    }
1440
1441
    static Status execute_impl(FunctionContext* context, Block& block,
1442
                               const ColumnNumbers& arguments, uint32_t result,
1443
791
                               size_t input_rows_count) {
1444
791
        return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result,
1445
791
                                                     input_rows_count);
1446
791
    }
1447
};
1448
1449
template <typename Impl>
1450
class FunctionJsonbContains : public IFunction {
1451
public:
1452
    static constexpr auto name = "json_contains";
1453
1
    String get_name() const override { return name; }
1454
65
    static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); }
1455
1456
56
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1457
56
        return make_nullable(std::make_shared<DataTypeUInt8>());
1458
56
    }
1459
64
    DataTypes get_variadic_argument_types_impl() const override {
1460
64
        return Impl::get_variadic_argument_types();
1461
64
    }
1462
56
    size_t get_number_of_arguments() const override {
1463
56
        return get_variadic_argument_types_impl().size();
1464
56
    }
1465
1466
187
    bool use_default_implementation_for_nulls() const override { return false; }
1467
1468
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1469
131
                        uint32_t result, size_t input_rows_count) const override {
1470
131
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1471
131
    }
1472
};
1473
1474
struct JsonbContainsUtil {
1475
    static Status jsonb_contains_execute(FunctionContext* context, Block& block,
1476
                                         const ColumnNumbers& arguments, uint32_t result,
1477
131
                                         size_t input_rows_count) {
1478
131
        DORIS_CHECK_GE(arguments.size(), 3);
1479
1480
131
        auto jsonb_data1_column = block.get_by_position(arguments[0]).column;
1481
131
        auto jsonb_data2_column = block.get_by_position(arguments[1]).column;
1482
1483
131
        ColumnPtr path_column;
1484
131
        bool is_const = false;
1485
131
        std::tie(path_column, is_const) =
1486
131
                unpack_if_const(block.get_by_position(arguments[2]).column);
1487
1488
131
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1489
131
        auto return_type = block.get_data_type(result);
1490
131
        MutableColumnPtr res = return_type->create_column();
1491
1492
131
        JsonbPath path;
1493
131
        if (is_const) {
1494
84
            if (path_column->is_null_at(0)) {
1495
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1496
1
                    null_map->get_data()[i] = 1;
1497
1
                    res->insert_data(nullptr, 0);
1498
1
                }
1499
1500
1
                block.replace_by_position(
1501
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1502
1
                return Status::OK();
1503
1
            }
1504
1505
83
            auto path_value = path_column->get_data_at(0);
1506
83
            if (!path.seek(path_value.data, path_value.size)) {
1507
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1508
1
                                               std::string_view(path_value.data, path_value.size));
1509
1
            }
1510
83
        }
1511
1512
384
        for (size_t i = 0; i < input_rows_count; ++i) {
1513
256
            if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) ||
1514
256
                path_column->is_null_at(i)) {
1515
26
                null_map->get_data()[i] = 1;
1516
26
                res->insert_data(nullptr, 0);
1517
26
                continue;
1518
26
            }
1519
1520
230
            if (!is_const) {
1521
50
                auto path_value = path_column->get_data_at(i);
1522
50
                path.clean();
1523
50
                if (!path.seek(path_value.data, path_value.size)) {
1524
1
                    return Status::InvalidArgument(
1525
1
                            "Json path error: Invalid Json Path for value: {}",
1526
1
                            std::string_view(path_value.data, path_value.size));
1527
1
                }
1528
50
            }
1529
1530
229
            auto jsonb_value1 = jsonb_data1_column->get_data_at(i);
1531
229
            auto jsonb_value2 = jsonb_data2_column->get_data_at(i);
1532
1533
229
            if (jsonb_value1.size == 0 || jsonb_value2.size == 0) {
1534
1
                null_map->get_data()[i] = 1;
1535
1
                res->insert_data(nullptr, 0);
1536
1
                continue;
1537
1
            }
1538
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1539
228
            const JsonbDocument* doc1 = nullptr;
1540
228
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
1541
228
                                                                  jsonb_value1.size, &doc1));
1542
228
            const JsonbDocument* doc2 = nullptr;
1543
228
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
1544
228
                                                                  jsonb_value2.size, &doc2));
1545
1546
228
            auto find_result = doc1->getValue()->findValue(path);
1547
228
            const auto* value1 = find_result.value;
1548
228
            const JsonbValue* value2 = doc2->getValue();
1549
228
            if (!value1 || !value2) {
1550
45
                null_map->get_data()[i] = 1;
1551
45
                res->insert_data(nullptr, 0);
1552
45
                continue;
1553
45
            }
1554
183
            auto contains_value = value1->contains(value2);
1555
183
            res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
1556
183
        }
1557
1558
128
        block.replace_by_position(result,
1559
128
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1560
128
        return Status::OK();
1561
129
    }
1562
};
1563
1564
template <bool ignore_null>
1565
class FunctionJsonbArray : public IFunction {
1566
public:
1567
    static constexpr auto name = "json_array";
1568
    static constexpr auto alias = "jsonb_array";
1569
1570
51
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb0EE6createEv
Line
Count
Source
1570
40
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb1EE6createEv
Line
Count
Source
1570
11
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
1571
1572
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev
1573
1574
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv
1575
35
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv
Line
Count
Source
1575
32
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv
Line
Count
Source
1575
3
    bool is_variadic() const override { return true; }
1576
1577
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1577
60
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1577
4
    bool use_default_implementation_for_nulls() const override { return false; }
1578
1579
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1580
33
        return std::make_shared<DataTypeJsonb>();
1581
33
    }
_ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1579
31
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1580
31
        return std::make_shared<DataTypeJsonb>();
1581
31
    }
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1579
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1580
2
        return std::make_shared<DataTypeJsonb>();
1581
2
    }
1582
1583
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1584
33
                        uint32_t result, size_t input_rows_count) const override {
1585
33
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1586
33
        auto column = return_data_type->create_column();
1587
33
        column->reserve(input_rows_count);
1588
1589
33
        JsonbWriter writer;
1590
100
        for (size_t i = 0; i < input_rows_count; ++i) {
1591
63
            writer.writeStartArray();
1592
182
            for (auto argument : arguments) {
1593
182
                auto&& [arg_column, is_const] =
1594
182
                        unpack_if_const(block.get_by_position(argument).column);
1595
182
                if (const auto* nullable_column =
1596
182
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1597
83
                    const auto& null_map = nullable_column->get_null_map_data();
1598
83
                    const auto& nested_column = nullable_column->get_nested_column();
1599
83
                    const auto& jsonb_column =
1600
83
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1601
83
                                    nested_column);
1602
1603
83
                    auto index = index_check_const(i, is_const);
1604
83
                    if (null_map[index]) {
1605
30
                        if constexpr (ignore_null) {
1606
4
                            continue;
1607
26
                        } else {
1608
26
                            writer.writeNull();
1609
26
                        }
1610
53
                    } else {
1611
53
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1612
53
                        const JsonbDocument* doc = nullptr;
1613
53
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1614
53
                                                                        jsonb_binary.size, &doc);
1615
53
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1616
0
                            if constexpr (ignore_null) {
1617
0
                                continue;
1618
0
                            } else {
1619
0
                                writer.writeNull();
1620
0
                            }
1621
53
                        } else {
1622
53
                            writer.writeValue(doc->getValue());
1623
53
                        }
1624
53
                    }
1625
99
                } else {
1626
99
                    const auto& jsonb_column =
1627
99
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1628
99
                                    *arg_column);
1629
1630
99
                    auto index = index_check_const(i, is_const);
1631
99
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1632
99
                    const JsonbDocument* doc = nullptr;
1633
99
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1634
99
                                                                    jsonb_binary.size, &doc);
1635
99
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1636
0
                        if constexpr (ignore_null) {
1637
0
                            continue;
1638
0
                        } else {
1639
0
                            writer.writeNull();
1640
0
                        }
1641
99
                    } else {
1642
99
                        writer.writeValue(doc->getValue());
1643
99
                    }
1644
99
                }
1645
182
            }
1646
19
            writer.writeEndArray();
1647
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1648
19
            writer.reset();
1649
19
        }
1650
1651
6
        block.get_by_position(result).column = std::move(column);
1652
6
        return Status::OK();
1653
33
    }
_ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1584
31
                        uint32_t result, size_t input_rows_count) const override {
1585
31
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1586
31
        auto column = return_data_type->create_column();
1587
31
        column->reserve(input_rows_count);
1588
1589
31
        JsonbWriter writer;
1590
79
        for (size_t i = 0; i < input_rows_count; ++i) {
1591
48
            writer.writeStartArray();
1592
152
            for (auto argument : arguments) {
1593
152
                auto&& [arg_column, is_const] =
1594
152
                        unpack_if_const(block.get_by_position(argument).column);
1595
152
                if (const auto* nullable_column =
1596
152
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1597
58
                    const auto& null_map = nullable_column->get_null_map_data();
1598
58
                    const auto& nested_column = nullable_column->get_nested_column();
1599
58
                    const auto& jsonb_column =
1600
58
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1601
58
                                    nested_column);
1602
1603
58
                    auto index = index_check_const(i, is_const);
1604
58
                    if (null_map[index]) {
1605
                        if constexpr (ignore_null) {
1606
                            continue;
1607
26
                        } else {
1608
26
                            writer.writeNull();
1609
26
                        }
1610
32
                    } else {
1611
32
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1612
32
                        const JsonbDocument* doc = nullptr;
1613
32
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1614
32
                                                                        jsonb_binary.size, &doc);
1615
32
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1616
                            if constexpr (ignore_null) {
1617
                                continue;
1618
0
                            } else {
1619
0
                                writer.writeNull();
1620
0
                            }
1621
32
                        } else {
1622
32
                            writer.writeValue(doc->getValue());
1623
32
                        }
1624
32
                    }
1625
94
                } else {
1626
94
                    const auto& jsonb_column =
1627
94
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1628
94
                                    *arg_column);
1629
1630
94
                    auto index = index_check_const(i, is_const);
1631
94
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1632
94
                    const JsonbDocument* doc = nullptr;
1633
94
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1634
94
                                                                    jsonb_binary.size, &doc);
1635
94
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1636
                        if constexpr (ignore_null) {
1637
                            continue;
1638
0
                        } else {
1639
0
                            writer.writeNull();
1640
0
                        }
1641
94
                    } else {
1642
94
                        writer.writeValue(doc->getValue());
1643
94
                    }
1644
94
                }
1645
152
            }
1646
48
            writer.writeEndArray();
1647
48
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1648
48
            writer.reset();
1649
48
        }
1650
1651
31
        block.get_by_position(result).column = std::move(column);
1652
31
        return Status::OK();
1653
31
    }
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1584
2
                        uint32_t result, size_t input_rows_count) const override {
1585
2
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1586
2
        auto column = return_data_type->create_column();
1587
2
        column->reserve(input_rows_count);
1588
1589
2
        JsonbWriter writer;
1590
21
        for (size_t i = 0; i < input_rows_count; ++i) {
1591
15
            writer.writeStartArray();
1592
30
            for (auto argument : arguments) {
1593
30
                auto&& [arg_column, is_const] =
1594
30
                        unpack_if_const(block.get_by_position(argument).column);
1595
30
                if (const auto* nullable_column =
1596
30
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1597
25
                    const auto& null_map = nullable_column->get_null_map_data();
1598
25
                    const auto& nested_column = nullable_column->get_nested_column();
1599
25
                    const auto& jsonb_column =
1600
25
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1601
25
                                    nested_column);
1602
1603
25
                    auto index = index_check_const(i, is_const);
1604
25
                    if (null_map[index]) {
1605
4
                        if constexpr (ignore_null) {
1606
4
                            continue;
1607
                        } else {
1608
                            writer.writeNull();
1609
                        }
1610
21
                    } else {
1611
21
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1612
21
                        const JsonbDocument* doc = nullptr;
1613
21
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1614
21
                                                                        jsonb_binary.size, &doc);
1615
21
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1616
0
                            if constexpr (ignore_null) {
1617
0
                                continue;
1618
                            } else {
1619
                                writer.writeNull();
1620
                            }
1621
21
                        } else {
1622
21
                            writer.writeValue(doc->getValue());
1623
21
                        }
1624
21
                    }
1625
25
                } else {
1626
5
                    const auto& jsonb_column =
1627
5
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1628
5
                                    *arg_column);
1629
1630
5
                    auto index = index_check_const(i, is_const);
1631
5
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1632
5
                    const JsonbDocument* doc = nullptr;
1633
5
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1634
5
                                                                    jsonb_binary.size, &doc);
1635
5
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1636
0
                        if constexpr (ignore_null) {
1637
0
                            continue;
1638
                        } else {
1639
                            writer.writeNull();
1640
                        }
1641
5
                    } else {
1642
5
                        writer.writeValue(doc->getValue());
1643
5
                    }
1644
5
                }
1645
30
            }
1646
19
            writer.writeEndArray();
1647
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1648
19
            writer.reset();
1649
19
        }
1650
1651
6
        block.get_by_position(result).column = std::move(column);
1652
6
        return Status::OK();
1653
2
    }
1654
};
1655
1656
class FunctionJsonbObject : public IFunction {
1657
public:
1658
    static constexpr auto name = "json_object";
1659
    static constexpr auto alias = "jsonb_object";
1660
1661
42
    static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); }
1662
1663
0
    String get_name() const override { return name; }
1664
1665
0
    size_t get_number_of_arguments() const override { return 0; }
1666
34
    bool is_variadic() const override { return true; }
1667
1668
72
    bool use_default_implementation_for_nulls() const override { return false; }
1669
1670
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1671
33
        return std::make_shared<DataTypeJsonb>();
1672
33
    }
1673
1674
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1675
43
                        uint32_t result, size_t input_rows_count) const override {
1676
43
        if (arguments.size() % 2 != 0) {
1677
0
            return Status::InvalidArgument(
1678
0
                    "JSON object must have an even number of arguments, but got: {}",
1679
0
                    arguments.size());
1680
0
        }
1681
1682
43
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1683
1684
43
        auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const,
1685
194
                            const NullMap* null_map, const size_t arg_index, const size_t row_idx) {
1686
194
            auto index = index_check_const(row_idx, is_const);
1687
194
            if (null_map && (*null_map)[index]) {
1688
1
                return Status::InvalidArgument(
1689
1
                        "JSON documents may not contain NULL member name(argument "
1690
1
                        "index:  "
1691
1
                        "{}, row index: {})",
1692
1
                        row_idx, arg_index);
1693
1
            }
1694
1695
193
            auto key_string = key_col.get_data_at(index);
1696
193
            if (key_string.size > 255) {
1697
0
                return Status::InvalidArgument(
1698
0
                        "JSON object keys(argument index: {}) must be less than 256 "
1699
0
                        "bytes, but got size: {}",
1700
0
                        arg_index, key_string.size);
1701
0
            }
1702
193
            writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size));
1703
193
            return Status::OK();
1704
193
        };
1705
1706
43
        auto write_value = [](JsonbWriter& writer, const ColumnString& value_col,
1707
43
                              const bool is_const, const NullMap* null_map, const size_t arg_index,
1708
193
                              const size_t row_idx) {
1709
193
            auto index = index_check_const(row_idx, is_const);
1710
193
            if (null_map && (*null_map)[index]) {
1711
46
                writer.writeNull();
1712
46
                return Status::OK();
1713
46
            }
1714
1715
147
            auto value_string = value_col.get_data_at(index);
1716
147
            const JsonbDocument* doc = nullptr;
1717
147
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
1718
147
                                                                  value_string.size, &doc));
1719
147
            writer.writeValue(doc->getValue());
1720
147
            return Status::OK();
1721
147
        };
1722
1723
137
        for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1724
94
            auto key_argument = arguments[arg_idx];
1725
94
            auto value_argument = arguments[arg_idx + 1];
1726
1727
94
            auto& key_data_type = block.get_by_position(key_argument).type;
1728
94
            auto& value_data_type = block.get_by_position(value_argument).type;
1729
94
            if (!is_string_type(key_data_type->get_primitive_type())) {
1730
0
                return Status::InvalidArgument(
1731
0
                        "JSON object key(argument index: {}) must be String, but got type: "
1732
0
                        "{}(primitive type: {})",
1733
0
                        arg_idx, key_data_type->get_name(),
1734
0
                        static_cast<int>(key_data_type->get_primitive_type()));
1735
0
            }
1736
1737
94
            if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) {
1738
0
                return Status::InvalidArgument(
1739
0
                        "JSON object value(argument index: {}) must be JSON, but got type: {}",
1740
0
                        arg_idx, value_data_type->get_name());
1741
0
            }
1742
94
        }
1743
1744
43
        auto column = return_data_type->create_column();
1745
43
        column->reserve(input_rows_count);
1746
1747
43
        JsonbWriter writer;
1748
107
        for (size_t i = 0; i != input_rows_count; ++i) {
1749
65
            writer.writeStartObject();
1750
258
            for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1751
194
                auto key_argument = arguments[arg_idx];
1752
194
                auto value_argument = arguments[arg_idx + 1];
1753
194
                auto&& [key_column, key_const] =
1754
194
                        unpack_if_const(block.get_by_position(key_argument).column);
1755
194
                auto&& [value_column, value_const] =
1756
194
                        unpack_if_const(block.get_by_position(value_argument).column);
1757
1758
194
                if (const auto* nullable_column =
1759
194
                            check_and_get_column<ColumnNullable>(key_column.get())) {
1760
3
                    const auto& null_map = nullable_column->get_null_map_data();
1761
3
                    const auto& nested_column = nullable_column->get_nested_column();
1762
3
                    const auto& key_arg_column =
1763
3
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1764
3
                                    nested_column);
1765
1766
3
                    RETURN_IF_ERROR(
1767
3
                            write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i));
1768
191
                } else {
1769
191
                    const auto& key_arg_column =
1770
191
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1771
191
                                    *key_column);
1772
191
                    RETURN_IF_ERROR(
1773
191
                            write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i));
1774
191
                }
1775
1776
193
                if (const auto* nullable_column =
1777
193
                            check_and_get_column<ColumnNullable>(value_column.get())) {
1778
93
                    const auto& null_map = nullable_column->get_null_map_data();
1779
93
                    const auto& nested_column = nullable_column->get_nested_column();
1780
93
                    const auto& value_arg_column =
1781
93
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1782
93
                                    nested_column);
1783
1784
93
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map,
1785
93
                                                arg_idx + 1, i));
1786
100
                } else {
1787
100
                    const auto& value_arg_column =
1788
100
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1789
100
                                    *value_column);
1790
100
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr,
1791
100
                                                arg_idx + 1, i));
1792
100
                }
1793
193
            }
1794
1795
64
            writer.writeEndObject();
1796
64
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1797
64
            writer.reset();
1798
64
        }
1799
1800
42
        block.get_by_position(result).column = std::move(column);
1801
42
        return Status::OK();
1802
43
    }
1803
};
1804
1805
enum class JsonbModifyType { Insert, Set, Replace };
1806
1807
template <JsonbModifyType modify_type>
1808
struct JsonbModifyName {
1809
    static constexpr auto name = "jsonb_modify";
1810
    static constexpr auto alias = "json_modify";
1811
};
1812
1813
template <>
1814
struct JsonbModifyName<JsonbModifyType::Insert> {
1815
    static constexpr auto name = "jsonb_insert";
1816
    static constexpr auto alias = "json_insert";
1817
};
1818
template <>
1819
struct JsonbModifyName<JsonbModifyType::Set> {
1820
    static constexpr auto name = "jsonb_set";
1821
    static constexpr auto alias = "json_set";
1822
};
1823
template <>
1824
struct JsonbModifyName<JsonbModifyType::Replace> {
1825
    static constexpr auto name = "jsonb_replace";
1826
    static constexpr auto alias = "json_replace";
1827
};
1828
1829
template <JsonbModifyType modify_type>
1830
class FunctionJsonbModify : public IFunction {
1831
public:
1832
    static constexpr auto name = JsonbModifyName<modify_type>::name;
1833
    static constexpr auto alias = JsonbModifyName<modify_type>::alias;
1834
1835
112
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv
Line
Count
Source
1835
38
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv
Line
Count
Source
1835
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv
Line
Count
Source
1835
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
1836
1837
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev
1838
1839
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv
1840
88
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv
Line
Count
Source
1840
30
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv
Line
Count
Source
1840
29
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv
Line
Count
Source
1840
29
    bool is_variadic() const override { return true; }
1841
1842
170
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1842
58
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1842
56
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
1842
56
    bool use_default_implementation_for_nulls() const override { return false; }
1843
1844
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1845
85
        return make_nullable(std::make_shared<DataTypeJsonb>());
1846
85
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1844
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1845
29
        return make_nullable(std::make_shared<DataTypeJsonb>());
1846
29
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1844
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1845
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1846
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1844
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1845
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1846
28
    }
1847
1848
    Status create_all_null_result(const DataTypePtr& return_data_type, Block& block,
1849
0
                                  uint32_t result, size_t input_rows_count) const {
1850
0
        auto result_column = return_data_type->create_column();
1851
0
        result_column->insert_default();
1852
0
        auto const_column = ColumnConst::create(std::move(result_column), input_rows_count);
1853
0
        block.get_by_position(result).column = std::move(const_column);
1854
0
        return Status::OK();
1855
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
1856
1857
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1858
85
                        uint32_t result, size_t input_rows_count) const override {
1859
85
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1860
0
            return Status::InvalidArgument(
1861
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1862
0
                    "but got: {}",
1863
0
                    name, arguments.size());
1864
0
        }
1865
1866
85
        const size_t keys_count = (arguments.size() - 1) / 2;
1867
1868
85
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1869
1870
85
        auto result_column = return_data_type->create_column();
1871
85
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1872
85
        auto& null_map = result_nullable_col.get_null_map_data();
1873
85
        auto& res_string_column =
1874
85
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1875
85
        auto& res_chars = res_string_column.get_chars();
1876
85
        auto& res_offsets = res_string_column.get_offsets();
1877
1878
85
        null_map.resize_fill(input_rows_count, 0);
1879
85
        res_offsets.resize(input_rows_count);
1880
85
        auto&& [json_data_arg_column, json_data_const] =
1881
85
                unpack_if_const(block.get_by_position(arguments[0]).column);
1882
1883
85
        if (json_data_const) {
1884
11
            if (json_data_arg_column->is_null_at(0)) {
1885
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1886
0
            }
1887
11
        }
1888
1889
85
        std::vector<const ColumnString*> json_path_columns(keys_count);
1890
85
        std::vector<bool> json_path_constant(keys_count);
1891
85
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1892
1893
85
        std::vector<const ColumnString*> json_value_columns(keys_count);
1894
85
        std::vector<bool> json_value_constant(keys_count);
1895
85
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1896
1897
85
        const NullMap* json_data_null_map = nullptr;
1898
85
        const ColumnString* json_data_column;
1899
85
        if (const auto* nullable_column =
1900
85
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1901
85
            json_data_null_map = &nullable_column->get_null_map_data();
1902
85
            const auto& nested_column = nullable_column->get_nested_column();
1903
85
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1904
85
        } else {
1905
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1906
0
        }
1907
1908
195
        for (size_t i = 1; i < arguments.size(); i += 2) {
1909
110
            auto&& [path_column, path_const] =
1910
110
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1911
110
            auto&& [value_column, value_const] =
1912
110
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1913
1914
110
            if (path_const) {
1915
27
                if (path_column->is_null_at(0)) {
1916
0
                    return create_all_null_result(return_data_type, block, result,
1917
0
                                                  input_rows_count);
1918
0
                }
1919
27
            }
1920
1921
110
            json_path_constant[i / 2] = path_const;
1922
110
            if (const auto* nullable_column =
1923
110
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1924
8
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1925
8
                const auto& nested_column = nullable_column->get_nested_column();
1926
8
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1927
102
            } else {
1928
102
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1929
102
            }
1930
1931
110
            json_value_constant[i / 2] = value_const;
1932
110
            if (const auto* nullable_column =
1933
110
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1934
53
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1935
53
                const auto& nested_column = nullable_column->get_nested_column();
1936
53
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1937
57
            } else {
1938
57
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1939
57
            }
1940
110
        }
1941
1942
85
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1943
85
        if (json_data_const) {
1944
11
            auto json_data_string = json_data_column->get_data_at(0);
1945
11
            const JsonbDocument* doc = nullptr;
1946
11
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1947
11
                                                                  json_data_string.size, &doc));
1948
11
            if (!doc || !doc->getValue()) [[unlikely]] {
1949
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1950
0
            }
1951
62
            for (size_t i = 0; i != input_rows_count; ++i) {
1952
51
                json_documents[i] = doc;
1953
51
            }
1954
74
        } else {
1955
152
            for (size_t i = 0; i != input_rows_count; ++i) {
1956
78
                if (json_data_null_map && (*json_data_null_map)[i]) {
1957
0
                    null_map[i] = 1;
1958
0
                    json_documents[i] = nullptr;
1959
0
                    continue;
1960
0
                }
1961
1962
78
                auto json_data_string = json_data_column->get_data_at(i);
1963
78
                const JsonbDocument* doc = nullptr;
1964
78
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1965
78
                                                                      json_data_string.size, &doc));
1966
78
                if (!doc || !doc->getValue()) [[unlikely]] {
1967
0
                    null_map[i] = 1;
1968
0
                    continue;
1969
0
                }
1970
78
                json_documents[i] = doc;
1971
78
            }
1972
74
        }
1973
1974
85
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1975
85
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1976
1977
85
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1978
85
                                               json_path_columns, json_path_constant,
1979
85
                                               json_path_null_maps, json_value_columns,
1980
85
                                               json_value_constant, json_value_null_maps));
1981
1982
77
        JsonbWriter writer;
1983
77
        struct DocumentBuffer {
1984
77
            DorisUniqueBufferPtr<char> ptr;
1985
77
            size_t size = 0;
1986
77
            size_t capacity = 0;
1987
77
        };
1988
1989
77
        DocumentBuffer tmp_buffer;
1990
1991
218
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1992
341
            for (size_t i = 1; i < arguments.size(); i += 2) {
1993
200
                const size_t index = i / 2;
1994
200
                auto& json_path = json_paths[index];
1995
200
                auto& json_value = json_values[index];
1996
1997
200
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1998
200
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1999
2000
200
                if (null_map[row_idx]) {
2001
0
                    continue;
2002
0
                }
2003
2004
200
                if (json_documents[row_idx] == nullptr) {
2005
0
                    null_map[row_idx] = 1;
2006
0
                    continue;
2007
0
                }
2008
2009
200
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2010
4
                    null_map[row_idx] = 1;
2011
4
                    continue;
2012
4
                }
2013
2014
196
                auto find_result =
2015
196
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2016
2017
196
                if (find_result.is_wildcard) {
2018
0
                    return Status::InvalidArgument(
2019
0
                            " In this situation, path expressions may not contain the * and ** "
2020
0
                            "tokens or an array range, argument index: {}, row index: {}",
2021
0
                            i, row_idx);
2022
0
                }
2023
2024
196
                if constexpr (modify_type == JsonbModifyType::Insert) {
2025
62
                    if (find_result.value) {
2026
18
                        continue;
2027
18
                    }
2028
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2029
67
                    if (!find_result.value) {
2030
11
                        continue;
2031
11
                    }
2032
67
                }
2033
2034
100
                std::vector<const JsonbValue*> parents;
2035
2036
196
                bool replace = false;
2037
196
                parents.emplace_back(json_documents[row_idx]->getValue());
2038
196
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2039
196
                if (find_result.value) {
2040
                    // find target path, replace it with the new value.
2041
100
                    replace = true;
2042
100
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2043
100
                                               json_path[path_index], parents)) {
2044
0
                        continue;
2045
0
                    }
2046
100
                } else {
2047
                    // does not find target path, insert the new value.
2048
96
                    JsonbPath new_path;
2049
96
                    DCHECK_GT(legs_count, 0);
2050
156
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2051
60
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2052
60
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2053
60
                                current_leg->leg_ptr, current_leg->leg_len,
2054
60
                                current_leg->array_index, current_leg->type);
2055
60
                        new_path.add_leg_to_leg_vector(std::move(leg));
2056
60
                    }
2057
2058
96
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2059
96
                                               parents)) {
2060
12
                        continue;
2061
12
                    }
2062
96
                }
2063
2064
184
                leg_info* last_leg =
2065
184
                        legs_count > 0
2066
184
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2067
184
                                : nullptr;
2068
184
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2069
184
                                                 json_value[value_index], replace, last_leg,
2070
184
                                                 writer));
2071
2072
184
                auto* writer_output = writer.getOutput();
2073
184
                if (writer_output->getSize() > tmp_buffer.capacity) {
2074
67
                    tmp_buffer.capacity =
2075
67
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2076
67
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2077
67
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2078
67
                }
2079
2080
184
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2081
184
                tmp_buffer.size = writer_output->getSize();
2082
2083
184
                writer.reset();
2084
2085
184
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2086
184
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2087
184
            }
2088
2089
141
            if (!null_map[row_idx]) {
2090
108
                const auto* jsonb_document = json_documents[row_idx];
2091
108
                const auto size = jsonb_document->numPackedBytes();
2092
108
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2093
108
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2094
108
            }
2095
2096
141
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2097
2098
141
            if (!null_map[row_idx]) {
2099
108
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2100
108
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2101
108
                const JsonbDocument* doc = nullptr;
2102
108
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2103
108
                        reinterpret_cast<const char*>(ptr), size, &doc));
2104
108
            }
2105
141
        }
2106
2107
106
        block.get_by_position(result).column = std::move(result_column);
2108
106
        return Status::OK();
2109
77
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1858
29
                        uint32_t result, size_t input_rows_count) const override {
1859
29
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1860
0
            return Status::InvalidArgument(
1861
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1862
0
                    "but got: {}",
1863
0
                    name, arguments.size());
1864
0
        }
1865
1866
29
        const size_t keys_count = (arguments.size() - 1) / 2;
1867
1868
29
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1869
1870
29
        auto result_column = return_data_type->create_column();
1871
29
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1872
29
        auto& null_map = result_nullable_col.get_null_map_data();
1873
29
        auto& res_string_column =
1874
29
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1875
29
        auto& res_chars = res_string_column.get_chars();
1876
29
        auto& res_offsets = res_string_column.get_offsets();
1877
1878
29
        null_map.resize_fill(input_rows_count, 0);
1879
29
        res_offsets.resize(input_rows_count);
1880
29
        auto&& [json_data_arg_column, json_data_const] =
1881
29
                unpack_if_const(block.get_by_position(arguments[0]).column);
1882
1883
29
        if (json_data_const) {
1884
5
            if (json_data_arg_column->is_null_at(0)) {
1885
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1886
0
            }
1887
5
        }
1888
1889
29
        std::vector<const ColumnString*> json_path_columns(keys_count);
1890
29
        std::vector<bool> json_path_constant(keys_count);
1891
29
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1892
1893
29
        std::vector<const ColumnString*> json_value_columns(keys_count);
1894
29
        std::vector<bool> json_value_constant(keys_count);
1895
29
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1896
1897
29
        const NullMap* json_data_null_map = nullptr;
1898
29
        const ColumnString* json_data_column;
1899
29
        if (const auto* nullable_column =
1900
29
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1901
29
            json_data_null_map = &nullable_column->get_null_map_data();
1902
29
            const auto& nested_column = nullable_column->get_nested_column();
1903
29
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1904
29
        } else {
1905
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1906
0
        }
1907
1908
65
        for (size_t i = 1; i < arguments.size(); i += 2) {
1909
36
            auto&& [path_column, path_const] =
1910
36
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1911
36
            auto&& [value_column, value_const] =
1912
36
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1913
1914
36
            if (path_const) {
1915
7
                if (path_column->is_null_at(0)) {
1916
0
                    return create_all_null_result(return_data_type, block, result,
1917
0
                                                  input_rows_count);
1918
0
                }
1919
7
            }
1920
1921
36
            json_path_constant[i / 2] = path_const;
1922
36
            if (const auto* nullable_column =
1923
36
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1924
5
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1925
5
                const auto& nested_column = nullable_column->get_nested_column();
1926
5
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1927
31
            } else {
1928
31
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1929
31
            }
1930
1931
36
            json_value_constant[i / 2] = value_const;
1932
36
            if (const auto* nullable_column =
1933
36
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1934
17
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1935
17
                const auto& nested_column = nullable_column->get_nested_column();
1936
17
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1937
19
            } else {
1938
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1939
19
            }
1940
36
        }
1941
1942
29
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1943
29
        if (json_data_const) {
1944
5
            auto json_data_string = json_data_column->get_data_at(0);
1945
5
            const JsonbDocument* doc = nullptr;
1946
5
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1947
5
                                                                  json_data_string.size, &doc));
1948
5
            if (!doc || !doc->getValue()) [[unlikely]] {
1949
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1950
0
            }
1951
30
            for (size_t i = 0; i != input_rows_count; ++i) {
1952
25
                json_documents[i] = doc;
1953
25
            }
1954
24
        } else {
1955
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1956
26
                if (json_data_null_map && (*json_data_null_map)[i]) {
1957
0
                    null_map[i] = 1;
1958
0
                    json_documents[i] = nullptr;
1959
0
                    continue;
1960
0
                }
1961
1962
26
                auto json_data_string = json_data_column->get_data_at(i);
1963
26
                const JsonbDocument* doc = nullptr;
1964
26
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1965
26
                                                                      json_data_string.size, &doc));
1966
26
                if (!doc || !doc->getValue()) [[unlikely]] {
1967
0
                    null_map[i] = 1;
1968
0
                    continue;
1969
0
                }
1970
26
                json_documents[i] = doc;
1971
26
            }
1972
24
        }
1973
1974
29
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1975
29
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1976
1977
29
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1978
29
                                               json_path_columns, json_path_constant,
1979
29
                                               json_path_null_maps, json_value_columns,
1980
29
                                               json_value_constant, json_value_null_maps));
1981
1982
25
        JsonbWriter writer;
1983
25
        struct DocumentBuffer {
1984
25
            DorisUniqueBufferPtr<char> ptr;
1985
25
            size_t size = 0;
1986
25
            size_t capacity = 0;
1987
25
        };
1988
1989
25
        DocumentBuffer tmp_buffer;
1990
1991
81
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1992
120
            for (size_t i = 1; i < arguments.size(); i += 2) {
1993
64
                const size_t index = i / 2;
1994
64
                auto& json_path = json_paths[index];
1995
64
                auto& json_value = json_values[index];
1996
1997
64
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1998
64
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1999
2000
64
                if (null_map[row_idx]) {
2001
0
                    continue;
2002
0
                }
2003
2004
64
                if (json_documents[row_idx] == nullptr) {
2005
0
                    null_map[row_idx] = 1;
2006
0
                    continue;
2007
0
                }
2008
2009
64
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2010
2
                    null_map[row_idx] = 1;
2011
2
                    continue;
2012
2
                }
2013
2014
62
                auto find_result =
2015
62
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2016
2017
62
                if (find_result.is_wildcard) {
2018
0
                    return Status::InvalidArgument(
2019
0
                            " In this situation, path expressions may not contain the * and ** "
2020
0
                            "tokens or an array range, argument index: {}, row index: {}",
2021
0
                            i, row_idx);
2022
0
                }
2023
2024
62
                if constexpr (modify_type == JsonbModifyType::Insert) {
2025
62
                    if (find_result.value) {
2026
18
                        continue;
2027
18
                    }
2028
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2029
                    if (!find_result.value) {
2030
                        continue;
2031
                    }
2032
                }
2033
2034
44
                std::vector<const JsonbValue*> parents;
2035
2036
62
                bool replace = false;
2037
62
                parents.emplace_back(json_documents[row_idx]->getValue());
2038
62
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2039
62
                if (find_result.value) {
2040
                    // find target path, replace it with the new value.
2041
0
                    replace = true;
2042
0
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2043
0
                                               json_path[path_index], parents)) {
2044
0
                        continue;
2045
0
                    }
2046
62
                } else {
2047
                    // does not find target path, insert the new value.
2048
62
                    JsonbPath new_path;
2049
62
                    DCHECK_GT(legs_count, 0);
2050
103
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2051
41
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2052
41
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2053
41
                                current_leg->leg_ptr, current_leg->leg_len,
2054
41
                                current_leg->array_index, current_leg->type);
2055
41
                        new_path.add_leg_to_leg_vector(std::move(leg));
2056
41
                    }
2057
2058
62
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2059
62
                                               parents)) {
2060
1
                        continue;
2061
1
                    }
2062
62
                }
2063
2064
61
                leg_info* last_leg =
2065
61
                        legs_count > 0
2066
61
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2067
61
                                : nullptr;
2068
61
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2069
61
                                                 json_value[value_index], replace, last_leg,
2070
61
                                                 writer));
2071
2072
61
                auto* writer_output = writer.getOutput();
2073
61
                if (writer_output->getSize() > tmp_buffer.capacity) {
2074
20
                    tmp_buffer.capacity =
2075
20
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2076
20
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2077
20
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2078
20
                }
2079
2080
61
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2081
61
                tmp_buffer.size = writer_output->getSize();
2082
2083
61
                writer.reset();
2084
2085
61
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2086
61
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2087
61
            }
2088
2089
56
            if (!null_map[row_idx]) {
2090
36
                const auto* jsonb_document = json_documents[row_idx];
2091
36
                const auto size = jsonb_document->numPackedBytes();
2092
36
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2093
36
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2094
36
            }
2095
2096
56
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2097
2098
56
            if (!null_map[row_idx]) {
2099
36
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2100
36
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2101
36
                const JsonbDocument* doc = nullptr;
2102
36
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2103
36
                        reinterpret_cast<const char*>(ptr), size, &doc));
2104
36
            }
2105
56
        }
2106
2107
43
        block.get_by_position(result).column = std::move(result_column);
2108
43
        return Status::OK();
2109
25
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1858
28
                        uint32_t result, size_t input_rows_count) const override {
1859
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1860
0
            return Status::InvalidArgument(
1861
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1862
0
                    "but got: {}",
1863
0
                    name, arguments.size());
1864
0
        }
1865
1866
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1867
1868
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1869
1870
28
        auto result_column = return_data_type->create_column();
1871
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1872
28
        auto& null_map = result_nullable_col.get_null_map_data();
1873
28
        auto& res_string_column =
1874
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1875
28
        auto& res_chars = res_string_column.get_chars();
1876
28
        auto& res_offsets = res_string_column.get_offsets();
1877
1878
28
        null_map.resize_fill(input_rows_count, 0);
1879
28
        res_offsets.resize(input_rows_count);
1880
28
        auto&& [json_data_arg_column, json_data_const] =
1881
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1882
1883
28
        if (json_data_const) {
1884
3
            if (json_data_arg_column->is_null_at(0)) {
1885
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1886
0
            }
1887
3
        }
1888
1889
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1890
28
        std::vector<bool> json_path_constant(keys_count);
1891
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1892
1893
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1894
28
        std::vector<bool> json_value_constant(keys_count);
1895
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1896
1897
28
        const NullMap* json_data_null_map = nullptr;
1898
28
        const ColumnString* json_data_column;
1899
28
        if (const auto* nullable_column =
1900
28
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1901
28
            json_data_null_map = &nullable_column->get_null_map_data();
1902
28
            const auto& nested_column = nullable_column->get_nested_column();
1903
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1904
28
        } else {
1905
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1906
0
        }
1907
1908
64
        for (size_t i = 1; i < arguments.size(); i += 2) {
1909
36
            auto&& [path_column, path_const] =
1910
36
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1911
36
            auto&& [value_column, value_const] =
1912
36
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1913
1914
36
            if (path_const) {
1915
9
                if (path_column->is_null_at(0)) {
1916
0
                    return create_all_null_result(return_data_type, block, result,
1917
0
                                                  input_rows_count);
1918
0
                }
1919
9
            }
1920
1921
36
            json_path_constant[i / 2] = path_const;
1922
36
            if (const auto* nullable_column =
1923
36
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1924
2
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1925
2
                const auto& nested_column = nullable_column->get_nested_column();
1926
2
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1927
34
            } else {
1928
34
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1929
34
            }
1930
1931
36
            json_value_constant[i / 2] = value_const;
1932
36
            if (const auto* nullable_column =
1933
36
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1934
17
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1935
17
                const auto& nested_column = nullable_column->get_nested_column();
1936
17
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1937
19
            } else {
1938
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1939
19
            }
1940
36
        }
1941
1942
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1943
28
        if (json_data_const) {
1944
3
            auto json_data_string = json_data_column->get_data_at(0);
1945
3
            const JsonbDocument* doc = nullptr;
1946
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1947
3
                                                                  json_data_string.size, &doc));
1948
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1949
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1950
0
            }
1951
17
            for (size_t i = 0; i != input_rows_count; ++i) {
1952
14
                json_documents[i] = doc;
1953
14
            }
1954
25
        } else {
1955
52
            for (size_t i = 0; i != input_rows_count; ++i) {
1956
27
                if (json_data_null_map && (*json_data_null_map)[i]) {
1957
0
                    null_map[i] = 1;
1958
0
                    json_documents[i] = nullptr;
1959
0
                    continue;
1960
0
                }
1961
1962
27
                auto json_data_string = json_data_column->get_data_at(i);
1963
27
                const JsonbDocument* doc = nullptr;
1964
27
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1965
27
                                                                      json_data_string.size, &doc));
1966
27
                if (!doc || !doc->getValue()) [[unlikely]] {
1967
0
                    null_map[i] = 1;
1968
0
                    continue;
1969
0
                }
1970
27
                json_documents[i] = doc;
1971
27
            }
1972
25
        }
1973
1974
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1975
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1976
1977
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1978
28
                                               json_path_columns, json_path_constant,
1979
28
                                               json_path_null_maps, json_value_columns,
1980
28
                                               json_value_constant, json_value_null_maps));
1981
1982
26
        JsonbWriter writer;
1983
26
        struct DocumentBuffer {
1984
26
            DorisUniqueBufferPtr<char> ptr;
1985
26
            size_t size = 0;
1986
26
            size_t capacity = 0;
1987
26
        };
1988
1989
26
        DocumentBuffer tmp_buffer;
1990
1991
65
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1992
107
            for (size_t i = 1; i < arguments.size(); i += 2) {
1993
68
                const size_t index = i / 2;
1994
68
                auto& json_path = json_paths[index];
1995
68
                auto& json_value = json_values[index];
1996
1997
68
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1998
68
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1999
2000
68
                if (null_map[row_idx]) {
2001
0
                    continue;
2002
0
                }
2003
2004
68
                if (json_documents[row_idx] == nullptr) {
2005
0
                    null_map[row_idx] = 1;
2006
0
                    continue;
2007
0
                }
2008
2009
68
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2010
1
                    null_map[row_idx] = 1;
2011
1
                    continue;
2012
1
                }
2013
2014
67
                auto find_result =
2015
67
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2016
2017
67
                if (find_result.is_wildcard) {
2018
0
                    return Status::InvalidArgument(
2019
0
                            " In this situation, path expressions may not contain the * and ** "
2020
0
                            "tokens or an array range, argument index: {}, row index: {}",
2021
0
                            i, row_idx);
2022
0
                }
2023
2024
                if constexpr (modify_type == JsonbModifyType::Insert) {
2025
                    if (find_result.value) {
2026
                        continue;
2027
                    }
2028
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2029
67
                    if (!find_result.value) {
2030
67
                        continue;
2031
67
                    }
2032
67
                }
2033
2034
67
                std::vector<const JsonbValue*> parents;
2035
2036
67
                bool replace = false;
2037
67
                parents.emplace_back(json_documents[row_idx]->getValue());
2038
67
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2039
67
                if (find_result.value) {
2040
                    // find target path, replace it with the new value.
2041
44
                    replace = true;
2042
44
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2043
44
                                               json_path[path_index], parents)) {
2044
0
                        continue;
2045
0
                    }
2046
44
                } else {
2047
                    // does not find target path, insert the new value.
2048
23
                    JsonbPath new_path;
2049
23
                    DCHECK_GT(legs_count, 0);
2050
42
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2051
19
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2052
19
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2053
19
                                current_leg->leg_ptr, current_leg->leg_len,
2054
19
                                current_leg->array_index, current_leg->type);
2055
19
                        new_path.add_leg_to_leg_vector(std::move(leg));
2056
19
                    }
2057
2058
23
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2059
23
                                               parents)) {
2060
11
                        continue;
2061
11
                    }
2062
23
                }
2063
2064
56
                leg_info* last_leg =
2065
56
                        legs_count > 0
2066
56
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2067
56
                                : nullptr;
2068
56
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2069
56
                                                 json_value[value_index], replace, last_leg,
2070
56
                                                 writer));
2071
2072
56
                auto* writer_output = writer.getOutput();
2073
56
                if (writer_output->getSize() > tmp_buffer.capacity) {
2074
24
                    tmp_buffer.capacity =
2075
24
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2076
24
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2077
24
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2078
24
                }
2079
2080
56
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2081
56
                tmp_buffer.size = writer_output->getSize();
2082
2083
56
                writer.reset();
2084
2085
56
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2086
56
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2087
56
            }
2088
2089
39
            if (!null_map[row_idx]) {
2090
38
                const auto* jsonb_document = json_documents[row_idx];
2091
38
                const auto size = jsonb_document->numPackedBytes();
2092
38
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2093
38
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2094
38
            }
2095
2096
39
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2097
2098
39
            if (!null_map[row_idx]) {
2099
38
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2100
38
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2101
38
                const JsonbDocument* doc = nullptr;
2102
38
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2103
38
                        reinterpret_cast<const char*>(ptr), size, &doc));
2104
38
            }
2105
39
        }
2106
2107
26
        block.get_by_position(result).column = std::move(result_column);
2108
26
        return Status::OK();
2109
26
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1858
28
                        uint32_t result, size_t input_rows_count) const override {
1859
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1860
0
            return Status::InvalidArgument(
1861
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1862
0
                    "but got: {}",
1863
0
                    name, arguments.size());
1864
0
        }
1865
1866
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1867
1868
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1869
1870
28
        auto result_column = return_data_type->create_column();
1871
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1872
28
        auto& null_map = result_nullable_col.get_null_map_data();
1873
28
        auto& res_string_column =
1874
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1875
28
        auto& res_chars = res_string_column.get_chars();
1876
28
        auto& res_offsets = res_string_column.get_offsets();
1877
1878
28
        null_map.resize_fill(input_rows_count, 0);
1879
28
        res_offsets.resize(input_rows_count);
1880
28
        auto&& [json_data_arg_column, json_data_const] =
1881
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1882
1883
28
        if (json_data_const) {
1884
3
            if (json_data_arg_column->is_null_at(0)) {
1885
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1886
0
            }
1887
3
        }
1888
1889
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1890
28
        std::vector<bool> json_path_constant(keys_count);
1891
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1892
1893
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1894
28
        std::vector<bool> json_value_constant(keys_count);
1895
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1896
1897
28
        const NullMap* json_data_null_map = nullptr;
1898
28
        const ColumnString* json_data_column;
1899
28
        if (const auto* nullable_column =
1900
28
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1901
28
            json_data_null_map = &nullable_column->get_null_map_data();
1902
28
            const auto& nested_column = nullable_column->get_nested_column();
1903
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1904
28
        } else {
1905
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1906
0
        }
1907
1908
66
        for (size_t i = 1; i < arguments.size(); i += 2) {
1909
38
            auto&& [path_column, path_const] =
1910
38
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1911
38
            auto&& [value_column, value_const] =
1912
38
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1913
1914
38
            if (path_const) {
1915
11
                if (path_column->is_null_at(0)) {
1916
0
                    return create_all_null_result(return_data_type, block, result,
1917
0
                                                  input_rows_count);
1918
0
                }
1919
11
            }
1920
1921
38
            json_path_constant[i / 2] = path_const;
1922
38
            if (const auto* nullable_column =
1923
38
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1924
1
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1925
1
                const auto& nested_column = nullable_column->get_nested_column();
1926
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1927
37
            } else {
1928
37
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1929
37
            }
1930
1931
38
            json_value_constant[i / 2] = value_const;
1932
38
            if (const auto* nullable_column =
1933
38
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1934
19
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1935
19
                const auto& nested_column = nullable_column->get_nested_column();
1936
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1937
19
            } else {
1938
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1939
19
            }
1940
38
        }
1941
1942
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1943
28
        if (json_data_const) {
1944
3
            auto json_data_string = json_data_column->get_data_at(0);
1945
3
            const JsonbDocument* doc = nullptr;
1946
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1947
3
                                                                  json_data_string.size, &doc));
1948
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1949
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1950
0
            }
1951
15
            for (size_t i = 0; i != input_rows_count; ++i) {
1952
12
                json_documents[i] = doc;
1953
12
            }
1954
25
        } else {
1955
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1956
25
                if (json_data_null_map && (*json_data_null_map)[i]) {
1957
0
                    null_map[i] = 1;
1958
0
                    json_documents[i] = nullptr;
1959
0
                    continue;
1960
0
                }
1961
1962
25
                auto json_data_string = json_data_column->get_data_at(i);
1963
25
                const JsonbDocument* doc = nullptr;
1964
25
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1965
25
                                                                      json_data_string.size, &doc));
1966
25
                if (!doc || !doc->getValue()) [[unlikely]] {
1967
0
                    null_map[i] = 1;
1968
0
                    continue;
1969
0
                }
1970
25
                json_documents[i] = doc;
1971
25
            }
1972
25
        }
1973
1974
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1975
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1976
1977
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1978
28
                                               json_path_columns, json_path_constant,
1979
28
                                               json_path_null_maps, json_value_columns,
1980
28
                                               json_value_constant, json_value_null_maps));
1981
1982
26
        JsonbWriter writer;
1983
26
        struct DocumentBuffer {
1984
26
            DorisUniqueBufferPtr<char> ptr;
1985
26
            size_t size = 0;
1986
26
            size_t capacity = 0;
1987
26
        };
1988
1989
26
        DocumentBuffer tmp_buffer;
1990
1991
72
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1992
114
            for (size_t i = 1; i < arguments.size(); i += 2) {
1993
68
                const size_t index = i / 2;
1994
68
                auto& json_path = json_paths[index];
1995
68
                auto& json_value = json_values[index];
1996
1997
68
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1998
68
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1999
2000
68
                if (null_map[row_idx]) {
2001
0
                    continue;
2002
0
                }
2003
2004
68
                if (json_documents[row_idx] == nullptr) {
2005
0
                    null_map[row_idx] = 1;
2006
0
                    continue;
2007
0
                }
2008
2009
68
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
2010
1
                    null_map[row_idx] = 1;
2011
1
                    continue;
2012
1
                }
2013
2014
67
                auto find_result =
2015
67
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2016
2017
67
                if (find_result.is_wildcard) {
2018
0
                    return Status::InvalidArgument(
2019
0
                            " In this situation, path expressions may not contain the * and ** "
2020
0
                            "tokens or an array range, argument index: {}, row index: {}",
2021
0
                            i, row_idx);
2022
0
                }
2023
2024
                if constexpr (modify_type == JsonbModifyType::Insert) {
2025
                    if (find_result.value) {
2026
                        continue;
2027
                    }
2028
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2029
67
                    if (!find_result.value) {
2030
11
                        continue;
2031
11
                    }
2032
67
                }
2033
2034
56
                std::vector<const JsonbValue*> parents;
2035
2036
67
                bool replace = false;
2037
67
                parents.emplace_back(json_documents[row_idx]->getValue());
2038
67
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2039
67
                if (find_result.value) {
2040
                    // find target path, replace it with the new value.
2041
56
                    replace = true;
2042
56
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2043
56
                                               json_path[path_index], parents)) {
2044
0
                        continue;
2045
0
                    }
2046
56
                } else {
2047
                    // does not find target path, insert the new value.
2048
11
                    JsonbPath new_path;
2049
11
                    DCHECK_GT(legs_count, 0);
2050
11
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2051
0
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2052
0
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2053
0
                                current_leg->leg_ptr, current_leg->leg_len,
2054
0
                                current_leg->array_index, current_leg->type);
2055
0
                        new_path.add_leg_to_leg_vector(std::move(leg));
2056
0
                    }
2057
2058
11
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2059
11
                                               parents)) {
2060
0
                        continue;
2061
0
                    }
2062
11
                }
2063
2064
67
                leg_info* last_leg =
2065
67
                        legs_count > 0
2066
67
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2067
67
                                : nullptr;
2068
67
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2069
67
                                                 json_value[value_index], replace, last_leg,
2070
67
                                                 writer));
2071
2072
67
                auto* writer_output = writer.getOutput();
2073
67
                if (writer_output->getSize() > tmp_buffer.capacity) {
2074
23
                    tmp_buffer.capacity =
2075
23
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2076
23
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2077
23
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2078
23
                }
2079
2080
67
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2081
67
                tmp_buffer.size = writer_output->getSize();
2082
2083
67
                writer.reset();
2084
2085
67
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2086
67
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2087
67
            }
2088
2089
46
            if (!null_map[row_idx]) {
2090
34
                const auto* jsonb_document = json_documents[row_idx];
2091
34
                const auto size = jsonb_document->numPackedBytes();
2092
34
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2093
34
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2094
34
            }
2095
2096
46
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2097
2098
46
            if (!null_map[row_idx]) {
2099
34
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2100
34
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2101
34
                const JsonbDocument* doc = nullptr;
2102
34
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2103
34
                        reinterpret_cast<const char*>(ptr), size, &doc));
2104
34
            }
2105
46
        }
2106
2107
37
        block.get_by_position(result).column = std::move(result_column);
2108
37
        return Status::OK();
2109
26
    }
2110
2111
    bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path,
2112
352
                               std::vector<const JsonbValue*>& parents) const {
2113
352
        const size_t index = parents.size() - 1;
2114
352
        if (index == path.get_leg_vector_size()) {
2115
149
            return true;
2116
149
        }
2117
2118
203
        JsonbPath current;
2119
203
        auto* current_leg = path.get_leg_from_leg_vector(index);
2120
203
        std::unique_ptr<leg_info> leg =
2121
203
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2122
203
                                           current_leg->array_index, current_leg->type);
2123
203
        current.add_leg_to_leg_vector(std::move(leg));
2124
2125
203
        auto find_result = root->findValue(current);
2126
203
        if (!find_result.value) {
2127
12
            std::string path_string;
2128
12
            current.to_string(&path_string);
2129
12
            return false;
2130
191
        } else if (find_result.value == root) {
2131
6
            return true;
2132
185
        } else {
2133
185
            parents.emplace_back(find_result.value);
2134
185
        }
2135
2136
185
        return build_parents_by_path(find_result.value, path, parents);
2137
203
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2112
84
                               std::vector<const JsonbValue*>& parents) const {
2113
84
        const size_t index = parents.size() - 1;
2114
84
        if (index == path.get_leg_vector_size()) {
2115
43
            return true;
2116
43
        }
2117
2118
41
        JsonbPath current;
2119
41
        auto* current_leg = path.get_leg_from_leg_vector(index);
2120
41
        std::unique_ptr<leg_info> leg =
2121
41
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2122
41
                                           current_leg->array_index, current_leg->type);
2123
41
        current.add_leg_to_leg_vector(std::move(leg));
2124
2125
41
        auto find_result = root->findValue(current);
2126
41
        if (!find_result.value) {
2127
1
            std::string path_string;
2128
1
            current.to_string(&path_string);
2129
1
            return false;
2130
40
        } else if (find_result.value == root) {
2131
0
            return true;
2132
40
        } else {
2133
40
            parents.emplace_back(find_result.value);
2134
40
        }
2135
2136
40
        return build_parents_by_path(find_result.value, path, parents);
2137
41
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2112
137
                               std::vector<const JsonbValue*>& parents) const {
2113
137
        const size_t index = parents.size() - 1;
2114
137
        if (index == path.get_leg_vector_size()) {
2115
53
            return true;
2116
53
        }
2117
2118
84
        JsonbPath current;
2119
84
        auto* current_leg = path.get_leg_from_leg_vector(index);
2120
84
        std::unique_ptr<leg_info> leg =
2121
84
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2122
84
                                           current_leg->array_index, current_leg->type);
2123
84
        current.add_leg_to_leg_vector(std::move(leg));
2124
2125
84
        auto find_result = root->findValue(current);
2126
84
        if (!find_result.value) {
2127
11
            std::string path_string;
2128
11
            current.to_string(&path_string);
2129
11
            return false;
2130
73
        } else if (find_result.value == root) {
2131
3
            return true;
2132
70
        } else {
2133
70
            parents.emplace_back(find_result.value);
2134
70
        }
2135
2136
70
        return build_parents_by_path(find_result.value, path, parents);
2137
84
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2112
131
                               std::vector<const JsonbValue*>& parents) const {
2113
131
        const size_t index = parents.size() - 1;
2114
131
        if (index == path.get_leg_vector_size()) {
2115
53
            return true;
2116
53
        }
2117
2118
78
        JsonbPath current;
2119
78
        auto* current_leg = path.get_leg_from_leg_vector(index);
2120
78
        std::unique_ptr<leg_info> leg =
2121
78
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2122
78
                                           current_leg->array_index, current_leg->type);
2123
78
        current.add_leg_to_leg_vector(std::move(leg));
2124
2125
78
        auto find_result = root->findValue(current);
2126
78
        if (!find_result.value) {
2127
0
            std::string path_string;
2128
0
            current.to_string(&path_string);
2129
0
            return false;
2130
78
        } else if (find_result.value == root) {
2131
3
            return true;
2132
75
        } else {
2133
75
            parents.emplace_back(find_result.value);
2134
75
        }
2135
2136
75
        return build_parents_by_path(find_result.value, path, parents);
2137
78
    }
2138
2139
    Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents,
2140
                            const size_t parent_index, const JsonbValue* value, const bool replace,
2141
340
                            const leg_info* last_leg, JsonbWriter& writer) const {
2142
340
        if (parent_index >= parents.size()) {
2143
0
            return Status::InvalidArgument(
2144
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2145
0
                    parent_index, parents.size());
2146
0
        }
2147
2148
340
        if (parents[parent_index] != root) {
2149
0
            return Status::InvalidArgument(
2150
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2151
0
                    "parents size: {}",
2152
0
                    parent_index, parents.size());
2153
0
        }
2154
2155
340
        if (parent_index == parents.size() - 1 && replace) {
2156
            // We are at the last parent, write the value directly
2157
100
            if (value == nullptr) {
2158
24
                writer.writeNull();
2159
76
            } else {
2160
76
                writer.writeValue(value);
2161
76
            }
2162
100
            return Status::OK();
2163
100
        }
2164
2165
240
        bool value_written = false;
2166
240
        bool is_last_parent = (parent_index == parents.size() - 1);
2167
240
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2168
240
        if (root->isArray()) {
2169
23
            writer.writeStartArray();
2170
23
            const auto* array_val = root->unpack<ArrayVal>();
2171
67
            for (int i = 0; i != array_val->numElem(); ++i) {
2172
44
                auto* it = array_val->get(i);
2173
2174
44
                if (is_last_parent && last_leg->array_index == i) {
2175
0
                    value_written = true;
2176
0
                    writer.writeValue(value);
2177
44
                } else if (it == next_parent) {
2178
13
                    value_written = true;
2179
13
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2180
13
                                                     last_leg, writer));
2181
31
                } else {
2182
31
                    writer.writeValue(it);
2183
31
                }
2184
44
            }
2185
23
            if (is_last_parent && !value_written) {
2186
10
                value_written = true;
2187
10
                writer.writeValue(value);
2188
10
            }
2189
2190
23
            writer.writeEndArray();
2191
2192
217
        } else {
2193
            /**
2194
                Because even for a non-array object, `$[0]` can still point to that object:
2195
                ```
2196
                select json_extract('{"key": "value"}', '$[0]');
2197
                +------------------------------------------+
2198
                | json_extract('{"key": "value"}', '$[0]') |
2199
                +------------------------------------------+
2200
                | {"key": "value"}                         |
2201
                +------------------------------------------+
2202
                ```
2203
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2204
                it should be converted to an array before insertion:
2205
                ```
2206
                select json_insert('123','$[1]', null);
2207
                +---------------------------------+
2208
                | json_insert('123','$[1]', null) |
2209
                +---------------------------------+
2210
                | [123, null]                     |
2211
                +---------------------------------+
2212
                ```
2213
             */
2214
217
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2215
8
                writer.writeStartArray();
2216
8
                writer.writeValue(root);
2217
8
                writer.writeValue(value);
2218
8
                writer.writeEndArray();
2219
8
                return Status::OK();
2220
209
            } else if (root->isObject()) {
2221
209
                writer.writeStartObject();
2222
209
                const auto* object_val = root->unpack<ObjectVal>();
2223
403
                for (const auto& it : *object_val) {
2224
403
                    writer.writeKey(it.getKeyStr(), it.klen());
2225
403
                    if (it.value() == next_parent) {
2226
172
                        value_written = true;
2227
172
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2228
172
                                                         value, replace, last_leg, writer));
2229
231
                    } else {
2230
231
                        writer.writeValue(it.value());
2231
231
                    }
2232
403
                }
2233
2234
209
                if (is_last_parent && !value_written) {
2235
37
                    value_written = true;
2236
37
                    writer.writeStartObject();
2237
37
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2238
37
                    writer.writeValue(value);
2239
37
                    writer.writeEndObject();
2240
37
                }
2241
209
                writer.writeEndObject();
2242
2243
209
            } else {
2244
0
                return Status::InvalidArgument("Cannot insert value into this type");
2245
0
            }
2246
217
        }
2247
2248
232
        if (!value_written) {
2249
0
            return Status::InvalidArgument(
2250
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2251
0
                    parent_index, parents.size());
2252
0
        }
2253
2254
232
        return Status::OK();
2255
232
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2141
83
                            const leg_info* last_leg, JsonbWriter& writer) const {
2142
83
        if (parent_index >= parents.size()) {
2143
0
            return Status::InvalidArgument(
2144
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2145
0
                    parent_index, parents.size());
2146
0
        }
2147
2148
83
        if (parents[parent_index] != root) {
2149
0
            return Status::InvalidArgument(
2150
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2151
0
                    "parents size: {}",
2152
0
                    parent_index, parents.size());
2153
0
        }
2154
2155
83
        if (parent_index == parents.size() - 1 && replace) {
2156
            // We are at the last parent, write the value directly
2157
0
            if (value == nullptr) {
2158
0
                writer.writeNull();
2159
0
            } else {
2160
0
                writer.writeValue(value);
2161
0
            }
2162
0
            return Status::OK();
2163
0
        }
2164
2165
83
        bool value_written = false;
2166
83
        bool is_last_parent = (parent_index == parents.size() - 1);
2167
83
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2168
83
        if (root->isArray()) {
2169
5
            writer.writeStartArray();
2170
5
            const auto* array_val = root->unpack<ArrayVal>();
2171
14
            for (int i = 0; i != array_val->numElem(); ++i) {
2172
9
                auto* it = array_val->get(i);
2173
2174
9
                if (is_last_parent && last_leg->array_index == i) {
2175
0
                    value_written = true;
2176
0
                    writer.writeValue(value);
2177
9
                } else if (it == next_parent) {
2178
0
                    value_written = true;
2179
0
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2180
0
                                                     last_leg, writer));
2181
9
                } else {
2182
9
                    writer.writeValue(it);
2183
9
                }
2184
9
            }
2185
5
            if (is_last_parent && !value_written) {
2186
5
                value_written = true;
2187
5
                writer.writeValue(value);
2188
5
            }
2189
2190
5
            writer.writeEndArray();
2191
2192
78
        } else {
2193
            /**
2194
                Because even for a non-array object, `$[0]` can still point to that object:
2195
                ```
2196
                select json_extract('{"key": "value"}', '$[0]');
2197
                +------------------------------------------+
2198
                | json_extract('{"key": "value"}', '$[0]') |
2199
                +------------------------------------------+
2200
                | {"key": "value"}                         |
2201
                +------------------------------------------+
2202
                ```
2203
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2204
                it should be converted to an array before insertion:
2205
                ```
2206
                select json_insert('123','$[1]', null);
2207
                +---------------------------------+
2208
                | json_insert('123','$[1]', null) |
2209
                +---------------------------------+
2210
                | [123, null]                     |
2211
                +---------------------------------+
2212
                ```
2213
             */
2214
78
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2215
4
                writer.writeStartArray();
2216
4
                writer.writeValue(root);
2217
4
                writer.writeValue(value);
2218
4
                writer.writeEndArray();
2219
4
                return Status::OK();
2220
74
            } else if (root->isObject()) {
2221
74
                writer.writeStartObject();
2222
74
                const auto* object_val = root->unpack<ObjectVal>();
2223
74
                for (const auto& it : *object_val) {
2224
70
                    writer.writeKey(it.getKeyStr(), it.klen());
2225
70
                    if (it.value() == next_parent) {
2226
40
                        value_written = true;
2227
40
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2228
40
                                                         value, replace, last_leg, writer));
2229
40
                    } else {
2230
30
                        writer.writeValue(it.value());
2231
30
                    }
2232
70
                }
2233
2234
74
                if (is_last_parent && !value_written) {
2235
34
                    value_written = true;
2236
34
                    writer.writeStartObject();
2237
34
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2238
34
                    writer.writeValue(value);
2239
34
                    writer.writeEndObject();
2240
34
                }
2241
74
                writer.writeEndObject();
2242
2243
74
            } else {
2244
0
                return Status::InvalidArgument("Cannot insert value into this type");
2245
0
            }
2246
78
        }
2247
2248
79
        if (!value_written) {
2249
0
            return Status::InvalidArgument(
2250
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2251
0
                    parent_index, parents.size());
2252
0
        }
2253
2254
79
        return Status::OK();
2255
79
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2141
126
                            const leg_info* last_leg, JsonbWriter& writer) const {
2142
126
        if (parent_index >= parents.size()) {
2143
0
            return Status::InvalidArgument(
2144
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2145
0
                    parent_index, parents.size());
2146
0
        }
2147
2148
126
        if (parents[parent_index] != root) {
2149
0
            return Status::InvalidArgument(
2150
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2151
0
                    "parents size: {}",
2152
0
                    parent_index, parents.size());
2153
0
        }
2154
2155
126
        if (parent_index == parents.size() - 1 && replace) {
2156
            // We are at the last parent, write the value directly
2157
44
            if (value == nullptr) {
2158
10
                writer.writeNull();
2159
34
            } else {
2160
34
                writer.writeValue(value);
2161
34
            }
2162
44
            return Status::OK();
2163
44
        }
2164
2165
82
        bool value_written = false;
2166
82
        bool is_last_parent = (parent_index == parents.size() - 1);
2167
82
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2168
82
        if (root->isArray()) {
2169
10
            writer.writeStartArray();
2170
10
            const auto* array_val = root->unpack<ArrayVal>();
2171
29
            for (int i = 0; i != array_val->numElem(); ++i) {
2172
19
                auto* it = array_val->get(i);
2173
2174
19
                if (is_last_parent && last_leg->array_index == i) {
2175
0
                    value_written = true;
2176
0
                    writer.writeValue(value);
2177
19
                } else if (it == next_parent) {
2178
5
                    value_written = true;
2179
5
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2180
5
                                                     last_leg, writer));
2181
14
                } else {
2182
14
                    writer.writeValue(it);
2183
14
                }
2184
19
            }
2185
10
            if (is_last_parent && !value_written) {
2186
5
                value_written = true;
2187
5
                writer.writeValue(value);
2188
5
            }
2189
2190
10
            writer.writeEndArray();
2191
2192
72
        } else {
2193
            /**
2194
                Because even for a non-array object, `$[0]` can still point to that object:
2195
                ```
2196
                select json_extract('{"key": "value"}', '$[0]');
2197
                +------------------------------------------+
2198
                | json_extract('{"key": "value"}', '$[0]') |
2199
                +------------------------------------------+
2200
                | {"key": "value"}                         |
2201
                +------------------------------------------+
2202
                ```
2203
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2204
                it should be converted to an array before insertion:
2205
                ```
2206
                select json_insert('123','$[1]', null);
2207
                +---------------------------------+
2208
                | json_insert('123','$[1]', null) |
2209
                +---------------------------------+
2210
                | [123, null]                     |
2211
                +---------------------------------+
2212
                ```
2213
             */
2214
72
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2215
4
                writer.writeStartArray();
2216
4
                writer.writeValue(root);
2217
4
                writer.writeValue(value);
2218
4
                writer.writeEndArray();
2219
4
                return Status::OK();
2220
68
            } else if (root->isObject()) {
2221
68
                writer.writeStartObject();
2222
68
                const auto* object_val = root->unpack<ObjectVal>();
2223
158
                for (const auto& it : *object_val) {
2224
158
                    writer.writeKey(it.getKeyStr(), it.klen());
2225
158
                    if (it.value() == next_parent) {
2226
65
                        value_written = true;
2227
65
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2228
65
                                                         value, replace, last_leg, writer));
2229
93
                    } else {
2230
93
                        writer.writeValue(it.value());
2231
93
                    }
2232
158
                }
2233
2234
68
                if (is_last_parent && !value_written) {
2235
3
                    value_written = true;
2236
3
                    writer.writeStartObject();
2237
3
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2238
3
                    writer.writeValue(value);
2239
3
                    writer.writeEndObject();
2240
3
                }
2241
68
                writer.writeEndObject();
2242
2243
68
            } else {
2244
0
                return Status::InvalidArgument("Cannot insert value into this type");
2245
0
            }
2246
72
        }
2247
2248
78
        if (!value_written) {
2249
0
            return Status::InvalidArgument(
2250
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2251
0
                    parent_index, parents.size());
2252
0
        }
2253
2254
78
        return Status::OK();
2255
78
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2141
131
                            const leg_info* last_leg, JsonbWriter& writer) const {
2142
131
        if (parent_index >= parents.size()) {
2143
0
            return Status::InvalidArgument(
2144
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2145
0
                    parent_index, parents.size());
2146
0
        }
2147
2148
131
        if (parents[parent_index] != root) {
2149
0
            return Status::InvalidArgument(
2150
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2151
0
                    "parents size: {}",
2152
0
                    parent_index, parents.size());
2153
0
        }
2154
2155
131
        if (parent_index == parents.size() - 1 && replace) {
2156
            // We are at the last parent, write the value directly
2157
56
            if (value == nullptr) {
2158
14
                writer.writeNull();
2159
42
            } else {
2160
42
                writer.writeValue(value);
2161
42
            }
2162
56
            return Status::OK();
2163
56
        }
2164
2165
75
        bool value_written = false;
2166
75
        bool is_last_parent = (parent_index == parents.size() - 1);
2167
75
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2168
75
        if (root->isArray()) {
2169
8
            writer.writeStartArray();
2170
8
            const auto* array_val = root->unpack<ArrayVal>();
2171
24
            for (int i = 0; i != array_val->numElem(); ++i) {
2172
16
                auto* it = array_val->get(i);
2173
2174
16
                if (is_last_parent && last_leg->array_index == i) {
2175
0
                    value_written = true;
2176
0
                    writer.writeValue(value);
2177
16
                } else if (it == next_parent) {
2178
8
                    value_written = true;
2179
8
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2180
8
                                                     last_leg, writer));
2181
8
                } else {
2182
8
                    writer.writeValue(it);
2183
8
                }
2184
16
            }
2185
8
            if (is_last_parent && !value_written) {
2186
0
                value_written = true;
2187
0
                writer.writeValue(value);
2188
0
            }
2189
2190
8
            writer.writeEndArray();
2191
2192
67
        } else {
2193
            /**
2194
                Because even for a non-array object, `$[0]` can still point to that object:
2195
                ```
2196
                select json_extract('{"key": "value"}', '$[0]');
2197
                +------------------------------------------+
2198
                | json_extract('{"key": "value"}', '$[0]') |
2199
                +------------------------------------------+
2200
                | {"key": "value"}                         |
2201
                +------------------------------------------+
2202
                ```
2203
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2204
                it should be converted to an array before insertion:
2205
                ```
2206
                select json_insert('123','$[1]', null);
2207
                +---------------------------------+
2208
                | json_insert('123','$[1]', null) |
2209
                +---------------------------------+
2210
                | [123, null]                     |
2211
                +---------------------------------+
2212
                ```
2213
             */
2214
67
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2215
0
                writer.writeStartArray();
2216
0
                writer.writeValue(root);
2217
0
                writer.writeValue(value);
2218
0
                writer.writeEndArray();
2219
0
                return Status::OK();
2220
67
            } else if (root->isObject()) {
2221
67
                writer.writeStartObject();
2222
67
                const auto* object_val = root->unpack<ObjectVal>();
2223
175
                for (const auto& it : *object_val) {
2224
175
                    writer.writeKey(it.getKeyStr(), it.klen());
2225
175
                    if (it.value() == next_parent) {
2226
67
                        value_written = true;
2227
67
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2228
67
                                                         value, replace, last_leg, writer));
2229
108
                    } else {
2230
108
                        writer.writeValue(it.value());
2231
108
                    }
2232
175
                }
2233
2234
67
                if (is_last_parent && !value_written) {
2235
0
                    value_written = true;
2236
0
                    writer.writeStartObject();
2237
0
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2238
0
                    writer.writeValue(value);
2239
0
                    writer.writeEndObject();
2240
0
                }
2241
67
                writer.writeEndObject();
2242
2243
67
            } else {
2244
0
                return Status::InvalidArgument("Cannot insert value into this type");
2245
0
            }
2246
67
        }
2247
2248
75
        if (!value_written) {
2249
0
            return Status::InvalidArgument(
2250
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2251
0
                    parent_index, parents.size());
2252
0
        }
2253
2254
75
        return Status::OK();
2255
75
    }
2256
2257
    Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths,
2258
                                  DorisVector<DorisVector<const JsonbValue*>>& json_values,
2259
                                  const ColumnNumbers& arguments, const size_t input_rows_count,
2260
                                  const std::vector<const ColumnString*>& json_path_columns,
2261
                                  const std::vector<bool>& json_path_constant,
2262
                                  const std::vector<const NullMap*>& json_path_null_maps,
2263
                                  const std::vector<const ColumnString*>& json_value_columns,
2264
                                  const std::vector<bool>& json_value_constant,
2265
85
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2266
187
        for (size_t i = 1; i < arguments.size(); i += 2) {
2267
110
            const size_t index = i / 2;
2268
110
            const auto* json_path_column = json_path_columns[index];
2269
110
            const auto* value_column = json_value_columns[index];
2270
2271
110
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2272
110
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2273
2274
225
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2275
123
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2276
6
                    continue;
2277
6
                }
2278
2279
117
                auto path_string = json_path_column->get_data_at(row_idx);
2280
117
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2281
3
                    return Status::InvalidArgument(
2282
3
                            "Json path error: Invalid Json Path for value: {}, "
2283
3
                            "argument "
2284
3
                            "index: {}, row index: {}",
2285
3
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2286
3
                }
2287
2288
114
                if (json_paths[index][row_idx].is_wildcard()) {
2289
5
                    return Status::InvalidArgument(
2290
5
                            "In this situation, path expressions may not contain the * and ** "
2291
5
                            "tokens, argument index: {}, row index: {}",
2292
5
                            i, row_idx);
2293
5
                }
2294
114
            }
2295
2296
302
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2297
200
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2298
48
                    continue;
2299
48
                }
2300
2301
152
                auto value_string = value_column->get_data_at(row_idx);
2302
152
                const JsonbDocument* doc = nullptr;
2303
152
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2304
152
                                                                      value_string.size, &doc));
2305
152
                if (doc) {
2306
152
                    json_values[index][row_idx] = doc->getValue();
2307
152
                }
2308
152
            }
2309
102
        }
2310
2311
77
        return Status::OK();
2312
85
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2265
29
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2266
61
        for (size_t i = 1; i < arguments.size(); i += 2) {
2267
36
            const size_t index = i / 2;
2268
36
            const auto* json_path_column = json_path_columns[index];
2269
36
            const auto* value_column = json_value_columns[index];
2270
2271
36
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2272
36
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2273
2274
79
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2275
47
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2276
4
                    continue;
2277
4
                }
2278
2279
43
                auto path_string = json_path_column->get_data_at(row_idx);
2280
43
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2281
1
                    return Status::InvalidArgument(
2282
1
                            "Json path error: Invalid Json Path for value: {}, "
2283
1
                            "argument "
2284
1
                            "index: {}, row index: {}",
2285
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2286
1
                }
2287
2288
42
                if (json_paths[index][row_idx].is_wildcard()) {
2289
3
                    return Status::InvalidArgument(
2290
3
                            "In this situation, path expressions may not contain the * and ** "
2291
3
                            "tokens, argument index: {}, row index: {}",
2292
3
                            i, row_idx);
2293
3
                }
2294
42
            }
2295
2296
96
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2297
64
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2298
14
                    continue;
2299
14
                }
2300
2301
50
                auto value_string = value_column->get_data_at(row_idx);
2302
50
                const JsonbDocument* doc = nullptr;
2303
50
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2304
50
                                                                      value_string.size, &doc));
2305
50
                if (doc) {
2306
50
                    json_values[index][row_idx] = doc->getValue();
2307
50
                }
2308
50
            }
2309
32
        }
2310
2311
25
        return Status::OK();
2312
29
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2265
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2266
62
        for (size_t i = 1; i < arguments.size(); i += 2) {
2267
36
            const size_t index = i / 2;
2268
36
            const auto* json_path_column = json_path_columns[index];
2269
36
            const auto* value_column = json_value_columns[index];
2270
2271
36
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2272
36
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2273
2274
72
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2275
38
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2276
1
                    continue;
2277
1
                }
2278
2279
37
                auto path_string = json_path_column->get_data_at(row_idx);
2280
37
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2281
1
                    return Status::InvalidArgument(
2282
1
                            "Json path error: Invalid Json Path for value: {}, "
2283
1
                            "argument "
2284
1
                            "index: {}, row index: {}",
2285
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2286
1
                }
2287
2288
36
                if (json_paths[index][row_idx].is_wildcard()) {
2289
1
                    return Status::InvalidArgument(
2290
1
                            "In this situation, path expressions may not contain the * and ** "
2291
1
                            "tokens, argument index: {}, row index: {}",
2292
1
                            i, row_idx);
2293
1
                }
2294
36
            }
2295
2296
102
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2297
68
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2298
16
                    continue;
2299
16
                }
2300
2301
52
                auto value_string = value_column->get_data_at(row_idx);
2302
52
                const JsonbDocument* doc = nullptr;
2303
52
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2304
52
                                                                      value_string.size, &doc));
2305
52
                if (doc) {
2306
52
                    json_values[index][row_idx] = doc->getValue();
2307
52
                }
2308
52
            }
2309
34
        }
2310
2311
26
        return Status::OK();
2312
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2265
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2266
64
        for (size_t i = 1; i < arguments.size(); i += 2) {
2267
38
            const size_t index = i / 2;
2268
38
            const auto* json_path_column = json_path_columns[index];
2269
38
            const auto* value_column = json_value_columns[index];
2270
2271
38
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2272
38
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2273
2274
74
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2275
38
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2276
1
                    continue;
2277
1
                }
2278
2279
37
                auto path_string = json_path_column->get_data_at(row_idx);
2280
37
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2281
1
                    return Status::InvalidArgument(
2282
1
                            "Json path error: Invalid Json Path for value: {}, "
2283
1
                            "argument "
2284
1
                            "index: {}, row index: {}",
2285
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2286
1
                }
2287
2288
36
                if (json_paths[index][row_idx].is_wildcard()) {
2289
1
                    return Status::InvalidArgument(
2290
1
                            "In this situation, path expressions may not contain the * and ** "
2291
1
                            "tokens, argument index: {}, row index: {}",
2292
1
                            i, row_idx);
2293
1
                }
2294
36
            }
2295
2296
104
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2297
68
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2298
18
                    continue;
2299
18
                }
2300
2301
50
                auto value_string = value_column->get_data_at(row_idx);
2302
50
                const JsonbDocument* doc = nullptr;
2303
50
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2304
50
                                                                      value_string.size, &doc));
2305
50
                if (doc) {
2306
50
                    json_values[index][row_idx] = doc->getValue();
2307
50
                }
2308
50
            }
2309
36
        }
2310
2311
26
        return Status::OK();
2312
28
    }
2313
};
2314
2315
struct JsonbContainsAndPathImpl {
2316
64
    static DataTypes get_variadic_argument_types() {
2317
64
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(),
2318
64
                std::make_shared<DataTypeString>()};
2319
64
    }
2320
2321
    static Status execute_impl(FunctionContext* context, Block& block,
2322
                               const ColumnNumbers& arguments, uint32_t result,
2323
131
                               size_t input_rows_count) {
2324
131
        return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result,
2325
131
                                                         input_rows_count);
2326
131
    }
2327
};
2328
2329
class FunctionJsonSearch : public IFunction {
2330
private:
2331
    using OneFun = std::function<Status(size_t, bool*)>;
2332
70
    static Status always_one(size_t i, bool* res) {
2333
70
        *res = true;
2334
70
        return Status::OK();
2335
70
    }
2336
48
    static Status always_all(size_t i, bool* res) {
2337
48
        *res = false;
2338
48
        return Status::OK();
2339
48
    }
2340
2341
    using CheckNullFun = std::function<bool(size_t)>;
2342
304
    static bool always_not_null(size_t) { return false; }
2343
2344
    using GetJsonStringRefFun = std::function<StringRef(size_t)>;
2345
2346
346
    Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const {
2347
346
        StringRef pattern; // not used
2348
346
        StringRef value_val(str.data(), str.size());
2349
346
        return (state->scalar_function)(&state->search_state, value_val, pattern, res);
2350
346
    }
2351
2352
    /**
2353
     * Recursive search for matching string, if found, the result will be added to a vector
2354
     * @param element json element
2355
     * @param one_match
2356
     * @param search_str
2357
     * @param cur_path
2358
     * @param matches The path that has already been matched
2359
     * @return true if matched else false
2360
     */
2361
    bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state,
2362
753
                      JsonbPath* cur_path, std::unordered_set<std::string>* matches) const {
2363
753
        if (element->isString()) {
2364
346
            const auto* json_string = element->unpack<JsonbStringVal>();
2365
346
            const std::string_view element_str(json_string->getBlob(), json_string->length());
2366
346
            unsigned char res;
2367
346
            RETURN_IF_ERROR(matched(element_str, state, &res));
2368
346
            if (res) {
2369
223
                std::string str;
2370
223
                auto valid = cur_path->to_string(&str);
2371
223
                if (!valid) {
2372
0
                    return false;
2373
0
                }
2374
223
                return matches->insert(str).second;
2375
223
            } else {
2376
123
                return false;
2377
123
            }
2378
407
        } else if (element->isObject()) {
2379
206
            const auto* object = element->unpack<ObjectVal>();
2380
206
            bool find = false;
2381
212
            for (const auto& item : *object) {
2382
212
                Slice key(item.getKeyStr(), item.klen());
2383
212
                const auto* child_element = item.value();
2384
                // construct an object member path leg.
2385
212
                auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE);
2386
212
                cur_path->add_leg_to_leg_vector(std::move(leg));
2387
212
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2388
212
                cur_path->pop_leg_from_leg_vector();
2389
212
                if (one_match && find) {
2390
8
                    return true;
2391
8
                }
2392
212
            }
2393
198
            return find;
2394
206
        } else if (element->isArray()) {
2395
201
            const auto* array = element->unpack<ArrayVal>();
2396
201
            bool find = false;
2397
530
            for (int i = 0; i < array->numElem(); ++i) {
2398
399
                auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE);
2399
399
                cur_path->add_leg_to_leg_vector(std::move(leg));
2400
399
                const auto* child_element = array->get(i);
2401
                // construct an array cell path leg.
2402
399
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2403
399
                cur_path->pop_leg_from_leg_vector();
2404
399
                if (one_match && find) {
2405
70
                    return true;
2406
70
                }
2407
399
            }
2408
131
            return find;
2409
201
        } else {
2410
0
            return false;
2411
0
        }
2412
753
    }
2413
2414
    void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches,
2415
128
                         ColumnString* result_col) const {
2416
128
        if (matches.size() == 1) {
2417
95
            for (const auto& str_ref : matches) {
2418
95
                writer.writeStartString();
2419
95
                writer.writeString(str_ref);
2420
95
                writer.writeEndString();
2421
95
            }
2422
95
        } else {
2423
33
            writer.writeStartArray();
2424
128
            for (const auto& str_ref : matches) {
2425
128
                writer.writeStartString();
2426
128
                writer.writeString(str_ref);
2427
128
                writer.writeEndString();
2428
128
            }
2429
33
            writer.writeEndArray();
2430
33
        }
2431
2432
128
        result_col->insert_data(writer.getOutput()->getBuffer(),
2433
128
                                (size_t)writer.getOutput()->getSize());
2434
128
    }
2435
2436
    template <bool search_is_const>
2437
    Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check,
2438
                          GetJsonStringRefFun col_json_string, CheckNullFun one_null_check,
2439
                          OneFun one_check, CheckNullFun search_null_check,
2440
                          const ColumnString* col_search_string, FunctionContext* context,
2441
54
                          size_t result) const {
2442
54
        auto result_col = ColumnString::create();
2443
54
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2444
2445
54
        std::shared_ptr<LikeState> state_ptr;
2446
54
        LikeState* state = nullptr;
2447
54
        if (search_is_const) {
2448
8
            state = reinterpret_cast<LikeState*>(
2449
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2450
8
        }
2451
2452
54
        bool is_one = false;
2453
2454
54
        JsonbWriter writer;
2455
226
        for (size_t i = 0; i < input_rows_count; ++i) {
2456
            // an error occurs if the json_doc argument is not a valid json document.
2457
174
            if (json_null_check(i)) {
2458
14
                null_map->get_data()[i] = 1;
2459
14
                result_col->insert_data("", 0);
2460
14
                continue;
2461
14
            }
2462
160
            const auto& json_doc_str = col_json_string(i);
2463
160
            const JsonbDocument* json_doc = nullptr;
2464
160
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2465
160
                                                            &json_doc);
2466
160
            if (!st.ok()) {
2467
0
                return Status::InvalidArgument(
2468
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2469
0
                        st.to_string());
2470
0
            }
2471
2472
160
            if (!one_null_check(i)) {
2473
158
                RETURN_IF_ERROR(one_check(i, &is_one));
2474
158
            }
2475
2476
158
            if (one_null_check(i) || search_null_check(i)) {
2477
16
                null_map->get_data()[i] = 1;
2478
16
                result_col->insert_data("", 0);
2479
16
                continue;
2480
16
            }
2481
2482
            // an error occurs if any path argument is not a valid path expression.
2483
142
            std::string root_path_str = "$";
2484
142
            JsonbPath root_path;
2485
142
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2486
142
            std::vector<JsonbPath*> paths;
2487
142
            paths.push_back(&root_path);
2488
2489
142
            if (!search_is_const) {
2490
110
                state_ptr = std::make_shared<LikeState>();
2491
110
                state_ptr->is_like_pattern = true;
2492
110
                const auto& search_str = col_search_string->get_data_at(i);
2493
110
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2494
110
                                                                         state_ptr, false));
2495
110
                state = state_ptr.get();
2496
110
            }
2497
2498
            // maintain a hashset to deduplicate matches.
2499
142
            std::unordered_set<std::string> matches;
2500
142
            for (const auto& item : paths) {
2501
142
                auto* cur_path = item;
2502
142
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2503
142
                if (is_one && find) {
2504
75
                    break;
2505
75
                }
2506
142
            }
2507
142
            if (matches.empty()) {
2508
                // returns NULL if the search_str is not found in the document.
2509
14
                null_map->get_data()[i] = 1;
2510
14
                result_col->insert_data("", 0);
2511
14
                continue;
2512
14
            }
2513
2514
128
            writer.reset();
2515
128
            make_result_str(writer, matches, result_col.get());
2516
128
        }
2517
52
        auto result_col_nullable =
2518
52
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2519
52
        block.replace_by_position(result, std::move(result_col_nullable));
2520
52
        return Status::OK();
2521
54
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2441
8
                          size_t result) const {
2442
8
        auto result_col = ColumnString::create();
2443
8
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2444
2445
8
        std::shared_ptr<LikeState> state_ptr;
2446
8
        LikeState* state = nullptr;
2447
8
        if (search_is_const) {
2448
8
            state = reinterpret_cast<LikeState*>(
2449
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2450
8
        }
2451
2452
8
        bool is_one = false;
2453
2454
8
        JsonbWriter writer;
2455
44
        for (size_t i = 0; i < input_rows_count; ++i) {
2456
            // an error occurs if the json_doc argument is not a valid json document.
2457
36
            if (json_null_check(i)) {
2458
4
                null_map->get_data()[i] = 1;
2459
4
                result_col->insert_data("", 0);
2460
4
                continue;
2461
4
            }
2462
32
            const auto& json_doc_str = col_json_string(i);
2463
32
            const JsonbDocument* json_doc = nullptr;
2464
32
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2465
32
                                                            &json_doc);
2466
32
            if (!st.ok()) {
2467
0
                return Status::InvalidArgument(
2468
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2469
0
                        st.to_string());
2470
0
            }
2471
2472
32
            if (!one_null_check(i)) {
2473
32
                RETURN_IF_ERROR(one_check(i, &is_one));
2474
32
            }
2475
2476
32
            if (one_null_check(i) || search_null_check(i)) {
2477
0
                null_map->get_data()[i] = 1;
2478
0
                result_col->insert_data("", 0);
2479
0
                continue;
2480
0
            }
2481
2482
            // an error occurs if any path argument is not a valid path expression.
2483
32
            std::string root_path_str = "$";
2484
32
            JsonbPath root_path;
2485
32
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2486
32
            std::vector<JsonbPath*> paths;
2487
32
            paths.push_back(&root_path);
2488
2489
32
            if (!search_is_const) {
2490
0
                state_ptr = std::make_shared<LikeState>();
2491
0
                state_ptr->is_like_pattern = true;
2492
0
                const auto& search_str = col_search_string->get_data_at(i);
2493
0
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2494
0
                                                                         state_ptr, false));
2495
0
                state = state_ptr.get();
2496
0
            }
2497
2498
            // maintain a hashset to deduplicate matches.
2499
32
            std::unordered_set<std::string> matches;
2500
32
            for (const auto& item : paths) {
2501
32
                auto* cur_path = item;
2502
32
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2503
32
                if (is_one && find) {
2504
16
                    break;
2505
16
                }
2506
32
            }
2507
32
            if (matches.empty()) {
2508
                // returns NULL if the search_str is not found in the document.
2509
0
                null_map->get_data()[i] = 1;
2510
0
                result_col->insert_data("", 0);
2511
0
                continue;
2512
0
            }
2513
2514
32
            writer.reset();
2515
32
            make_result_str(writer, matches, result_col.get());
2516
32
        }
2517
8
        auto result_col_nullable =
2518
8
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2519
8
        block.replace_by_position(result, std::move(result_col_nullable));
2520
8
        return Status::OK();
2521
8
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2441
46
                          size_t result) const {
2442
46
        auto result_col = ColumnString::create();
2443
46
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2444
2445
46
        std::shared_ptr<LikeState> state_ptr;
2446
46
        LikeState* state = nullptr;
2447
46
        if (search_is_const) {
2448
0
            state = reinterpret_cast<LikeState*>(
2449
0
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2450
0
        }
2451
2452
46
        bool is_one = false;
2453
2454
46
        JsonbWriter writer;
2455
182
        for (size_t i = 0; i < input_rows_count; ++i) {
2456
            // an error occurs if the json_doc argument is not a valid json document.
2457
138
            if (json_null_check(i)) {
2458
10
                null_map->get_data()[i] = 1;
2459
10
                result_col->insert_data("", 0);
2460
10
                continue;
2461
10
            }
2462
128
            const auto& json_doc_str = col_json_string(i);
2463
128
            const JsonbDocument* json_doc = nullptr;
2464
128
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2465
128
                                                            &json_doc);
2466
128
            if (!st.ok()) {
2467
0
                return Status::InvalidArgument(
2468
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2469
0
                        st.to_string());
2470
0
            }
2471
2472
128
            if (!one_null_check(i)) {
2473
126
                RETURN_IF_ERROR(one_check(i, &is_one));
2474
126
            }
2475
2476
126
            if (one_null_check(i) || search_null_check(i)) {
2477
16
                null_map->get_data()[i] = 1;
2478
16
                result_col->insert_data("", 0);
2479
16
                continue;
2480
16
            }
2481
2482
            // an error occurs if any path argument is not a valid path expression.
2483
110
            std::string root_path_str = "$";
2484
110
            JsonbPath root_path;
2485
110
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2486
110
            std::vector<JsonbPath*> paths;
2487
110
            paths.push_back(&root_path);
2488
2489
110
            if (!search_is_const) {
2490
110
                state_ptr = std::make_shared<LikeState>();
2491
110
                state_ptr->is_like_pattern = true;
2492
110
                const auto& search_str = col_search_string->get_data_at(i);
2493
110
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2494
110
                                                                         state_ptr, false));
2495
110
                state = state_ptr.get();
2496
110
            }
2497
2498
            // maintain a hashset to deduplicate matches.
2499
110
            std::unordered_set<std::string> matches;
2500
110
            for (const auto& item : paths) {
2501
110
                auto* cur_path = item;
2502
110
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2503
110
                if (is_one && find) {
2504
59
                    break;
2505
59
                }
2506
110
            }
2507
110
            if (matches.empty()) {
2508
                // returns NULL if the search_str is not found in the document.
2509
14
                null_map->get_data()[i] = 1;
2510
14
                result_col->insert_data("", 0);
2511
14
                continue;
2512
14
            }
2513
2514
96
            writer.reset();
2515
96
            make_result_str(writer, matches, result_col.get());
2516
96
        }
2517
44
        auto result_col_nullable =
2518
44
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2519
44
        block.replace_by_position(result, std::move(result_col_nullable));
2520
44
        return Status::OK();
2521
46
    }
2522
2523
    static constexpr auto one = "one";
2524
    static constexpr auto all = "all";
2525
2526
public:
2527
    static constexpr auto name = "json_search";
2528
58
    static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); }
2529
2530
1
    String get_name() const override { return name; }
2531
50
    bool is_variadic() const override { return false; }
2532
49
    size_t get_number_of_arguments() const override { return 3; }
2533
2534
49
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2535
49
        return make_nullable(std::make_shared<DataTypeJsonb>());
2536
49
    }
2537
2538
114
    bool use_default_implementation_for_nulls() const override { return false; }
2539
2540
232
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
2541
232
        if (scope != FunctionContext::THREAD_LOCAL) {
2542
49
            return Status::OK();
2543
49
        }
2544
183
        if (context->is_col_constant(2)) {
2545
85
            std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
2546
85
            state->is_like_pattern = true;
2547
85
            const auto pattern_col = context->get_constant_col(2)->column_ptr;
2548
85
            const auto& pattern = pattern_col->get_data_at(0);
2549
85
            RETURN_IF_ERROR(
2550
85
                    FunctionLike::construct_like_const_state(context, pattern, state, false));
2551
85
            context->set_function_state(scope, state);
2552
85
        }
2553
183
        return Status::OK();
2554
183
    }
2555
2556
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2557
65
                        uint32_t result, size_t input_rows_count) const override {
2558
        // the json_doc, one_or_all, and search_str must be given.
2559
        // and we require the positions are static.
2560
65
        if (arguments.size() < 3) {
2561
0
            return Status::InvalidArgument("too few arguments for function {}", name);
2562
0
        }
2563
65
        if (arguments.size() > 3) {
2564
0
            return Status::NotSupported("escape and path params are not support now");
2565
0
        }
2566
2567
65
        CheckNullFun json_null_check = always_not_null;
2568
65
        GetJsonStringRefFun get_json_fun;
2569
        // prepare jsonb data column
2570
65
        auto&& [col_json, json_is_const] =
2571
65
                unpack_if_const(block.get_by_position(arguments[0]).column);
2572
65
        const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get());
2573
65
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) {
2574
65
            col_json_string =
2575
65
                    check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get());
2576
65
        }
2577
2578
65
        if (!col_json_string) {
2579
0
            return Status::RuntimeError("Illegal arg json {} should be ColumnString",
2580
0
                                        col_json->get_name());
2581
0
        }
2582
2583
65
        auto create_all_null_result = [&]() {
2584
6
            auto res_str = ColumnString::create();
2585
6
            res_str->insert_default();
2586
6
            auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1));
2587
6
            if (input_rows_count > 1) {
2588
6
                block.get_by_position(result).column =
2589
6
                        ColumnConst::create(std::move(res), input_rows_count);
2590
6
            } else {
2591
0
                block.get_by_position(result).column = std::move(res);
2592
0
            }
2593
6
            return Status::OK();
2594
6
        };
2595
2596
65
        if (json_is_const) {
2597
11
            if (col_json->is_null_at(0)) {
2598
2
                return create_all_null_result();
2599
9
            } else {
2600
9
                const auto& json_str = col_json_string->get_data_at(0);
2601
36
                get_json_fun = [json_str](size_t i) { return json_str; };
2602
9
            }
2603
54
        } else {
2604
138
            json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); };
2605
124
            get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); };
2606
54
        }
2607
2608
        // one_or_all
2609
63
        CheckNullFun one_null_check = always_not_null;
2610
63
        OneFun one_check = always_one;
2611
63
        auto&& [col_one, one_is_const] =
2612
63
                unpack_if_const(block.get_by_position(arguments[1]).column);
2613
63
        one_is_const |= input_rows_count == 1;
2614
63
        const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get());
2615
63
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) {
2616
9
            col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2617
9
        }
2618
63
        if (!col_one_string) {
2619
0
            return Status::RuntimeError("Illegal arg one {} should be ColumnString",
2620
0
                                        col_one->get_name());
2621
0
        }
2622
63
        if (one_is_const) {
2623
51
            if (col_one->is_null_at(0)) {
2624
4
                return create_all_null_result();
2625
47
            } else {
2626
47
                const auto& one_or_all = col_one_string->get_data_at(0);
2627
47
                std::string one_or_all_str = one_or_all.to_string();
2628
47
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2629
17
                    one_check = always_all;
2630
30
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2631
                    // nothing
2632
25
                } else {
2633
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2634
5
                    return Status::InvalidArgument(
2635
5
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2636
5
                }
2637
47
            }
2638
51
        } else {
2639
82
            one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); };
2640
40
            one_check = [col_one_string](size_t i, bool* is_one) {
2641
40
                const auto& one_or_all = col_one_string->get_data_at(i);
2642
40
                std::string one_or_all_str = one_or_all.to_string();
2643
40
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2644
22
                    *is_one = false;
2645
22
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2646
16
                    *is_one = true;
2647
16
                } else {
2648
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2649
2
                    return Status::InvalidArgument(
2650
2
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2651
2
                }
2652
38
                return Status::OK();
2653
40
            };
2654
12
        }
2655
2656
        // search_str
2657
54
        auto&& [col_search, search_is_const] =
2658
54
                unpack_if_const(block.get_by_position(arguments[2]).column);
2659
2660
54
        const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get());
2661
54
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) {
2662
28
            col_search_string =
2663
28
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2664
28
        }
2665
54
        if (!col_search_string) {
2666
0
            return Status::RuntimeError("Illegal arg pattern {} should be ColumnString",
2667
0
                                        col_search->get_name());
2668
0
        }
2669
54
        if (search_is_const) {
2670
8
            CheckNullFun search_null_check = always_not_null;
2671
8
            if (col_search->is_null_at(0)) {
2672
0
                return create_all_null_result();
2673
0
            }
2674
8
            RETURN_IF_ERROR(execute_vector<true>(
2675
8
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2676
8
                    one_check, search_null_check, col_search_string, context, result));
2677
46
        } else {
2678
124
            CheckNullFun search_null_check = [col_search](size_t i) {
2679
124
                return col_search->is_null_at(i);
2680
124
            };
2681
46
            RETURN_IF_ERROR(execute_vector<false>(
2682
46
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2683
46
                    one_check, search_null_check, col_search_string, context, result));
2684
46
        }
2685
52
        return Status::OK();
2686
54
    }
2687
};
2688
2689
struct DocumentBuffer {
2690
    std::unique_ptr<char[]> ptr;
2691
    size_t size = 0;
2692
    size_t capacity = 0;
2693
};
2694
2695
class FunctionJsonbRemove : public IFunction {
2696
public:
2697
    static constexpr auto name = "jsonb_remove";
2698
    static constexpr auto alias = "json_remove";
2699
2700
31
    static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); }
2701
2702
0
    String get_name() const override { return name; }
2703
2704
0
    size_t get_number_of_arguments() const override { return 0; }
2705
23
    bool is_variadic() const override { return true; }
2706
2707
44
    bool use_default_implementation_for_nulls() const override { return false; }
2708
2709
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2710
22
        return make_nullable(std::make_shared<DataTypeJsonb>());
2711
22
    }
2712
2713
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2714
22
                        uint32_t result, size_t input_rows_count) const override {
2715
22
        DORIS_CHECK_GE(arguments.size(), 2);
2716
2717
        // Check if arguments count is valid (json_doc + at least one path)
2718
22
        if (arguments.size() < 2) {
2719
0
            return Status::InvalidArgument("json_remove requires at least 2 arguments");
2720
0
        }
2721
2722
22
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
2723
22
        auto result_column = return_data_type->create_column();
2724
22
        auto& nullable_column = assert_cast<ColumnNullable&>(*result_column);
2725
22
        auto& res_chars =
2726
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars();
2727
22
        auto& res_offsets =
2728
22
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets();
2729
22
        auto& null_map = nullable_column.get_null_map_data();
2730
2731
22
        res_chars.reserve(input_rows_count * 64);
2732
22
        res_offsets.resize(input_rows_count);
2733
22
        null_map.resize_fill(input_rows_count, 0);
2734
2735
        // Get JSON document column
2736
22
        auto [json_column, json_const] =
2737
22
                unpack_if_const(block.get_by_position(arguments[0]).column);
2738
22
        const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get());
2739
22
        const ColumnString* json_data_column = nullptr;
2740
22
        const NullMap* json_null_map = nullptr;
2741
2742
22
        if (json_nullable) {
2743
22
            json_null_map = &json_nullable->get_null_map_data();
2744
22
            json_data_column =
2745
22
                    check_and_get_column<ColumnString>(&json_nullable->get_nested_column());
2746
22
        } else {
2747
0
            json_data_column = check_and_get_column<ColumnString>(json_column.get());
2748
0
        }
2749
2750
22
        if (!json_data_column) {
2751
0
            return Status::InvalidArgument("First argument must be a JSON document");
2752
0
        }
2753
2754
        // Parse paths
2755
22
        std::vector<const ColumnString*> path_columns;
2756
22
        std::vector<const NullMap*> path_null_maps;
2757
22
        std::vector<bool> path_constants;
2758
2759
51
        for (size_t i = 1; i < arguments.size(); ++i) {
2760
29
            auto [path_column, path_const] =
2761
29
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2762
29
            const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get());
2763
2764
29
            if (path_nullable) {
2765
6
                path_null_maps.push_back(&path_nullable->get_null_map_data());
2766
6
                path_columns.push_back(
2767
6
                        check_and_get_column<ColumnString>(&path_nullable->get_nested_column()));
2768
23
            } else {
2769
23
                path_null_maps.push_back(nullptr);
2770
23
                path_columns.push_back(check_and_get_column<ColumnString>(path_column.get()));
2771
23
            }
2772
2773
29
            if (!path_columns.back()) {
2774
0
                return Status::InvalidArgument(
2775
0
                        fmt::format("Argument {} must be a string path", i + 1));
2776
0
            }
2777
2778
29
            path_constants.push_back(path_const);
2779
29
        }
2780
2781
        // Reusable JsonbWriter for performance
2782
22
        JsonbWriter writer;
2783
2784
48
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) {
2785
28
            size_t json_idx = index_check_const(row_idx, json_const);
2786
2787
            // Check if JSON document is null
2788
28
            if (json_null_map && (*json_null_map)[json_idx]) {
2789
2
                null_map[row_idx] = 1;
2790
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2791
2
                continue;
2792
2
            }
2793
2794
            // Parse JSON document
2795
26
            const auto& json_data = json_data_column->get_data_at(json_idx);
2796
26
            const JsonbDocument* json_doc = nullptr;
2797
26
            Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data,
2798
26
                                                                        json_data.size, &json_doc);
2799
2800
26
            if (!parse_status.ok() || !json_doc) {
2801
0
                null_map[row_idx] = 1;
2802
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2803
0
                continue;
2804
0
            }
2805
2806
            // Check if any path is null
2807
26
            bool has_null_path = false;
2808
59
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2809
35
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2810
35
                if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) {
2811
2
                    has_null_path = true;
2812
2
                    break;
2813
2
                }
2814
35
            }
2815
2816
26
            if (has_null_path) {
2817
2
                null_map[row_idx] = 1;
2818
2
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2819
2
                continue;
2820
2
            }
2821
2822
24
            std::vector<JsonbPath> paths;
2823
24
            std::vector<bool> path_constants_vec;
2824
2825
54
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2826
32
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2827
32
                const auto& path_data = path_columns[path_idx]->get_data_at(idx);
2828
2829
32
                JsonbPath path;
2830
32
                if (!path.seek(path_data.data, path_data.size)) {
2831
1
                    return Status::InvalidArgument(
2832
1
                            "Json path error: Invalid Json Path for value: {} at row: {}",
2833
1
                            std::string_view(path_data.data, path_data.size), row_idx);
2834
1
                }
2835
2836
31
                if (path.is_wildcard() || path.is_supper_wildcard()) {
2837
1
                    return Status::InvalidArgument(
2838
1
                            "In this situation, path expressions may not contain the * and ** "
2839
1
                            "tokens or an array range, argument index: {}, row index: {}",
2840
1
                            path_idx + 1, row_idx);
2841
1
                }
2842
2843
30
                paths.push_back(std::move(path));
2844
30
                path_constants_vec.push_back(path_constants[path_idx]);
2845
30
            }
2846
2847
22
            const JsonbValue* current_value = json_doc->getValue();
2848
2849
22
            DocumentBuffer tmp_buffer;
2850
2851
52
            for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) {
2852
30
                writer.reset();
2853
2854
30
                auto find_result = current_value->findValue(paths[path_idx]);
2855
2856
30
                if (find_result.is_wildcard) {
2857
0
                    continue;
2858
0
                }
2859
2860
30
                if (find_result.value) {
2861
24
                    RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer));
2862
2863
24
                    auto* writer_output = writer.getOutput();
2864
24
                    if (writer_output->getSize() > tmp_buffer.capacity) {
2865
17
                        tmp_buffer.capacity =
2866
17
                                ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2867
17
                        tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity);
2868
17
                        DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2869
17
                    }
2870
2871
24
                    memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(),
2872
24
                           writer_output->getSize());
2873
24
                    tmp_buffer.size = writer_output->getSize();
2874
2875
24
                    const JsonbDocument* new_doc = nullptr;
2876
24
                    RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2877
24
                            tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc));
2878
2879
24
                    current_value = new_doc->getValue();
2880
24
                }
2881
30
            }
2882
2883
22
            const JsonbDocument* modified_doc = nullptr;
2884
22
            if (current_value != json_doc->getValue()) {
2885
17
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2886
17
                        tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc));
2887
17
            } else {
2888
5
                modified_doc = json_doc;
2889
5
            }
2890
2891
            // Write the final result
2892
22
            const auto size = modified_doc->numPackedBytes();
2893
22
            res_chars.insert(reinterpret_cast<const char*>(modified_doc),
2894
22
                             reinterpret_cast<const char*>(modified_doc) + size);
2895
22
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2896
22
        }
2897
2898
20
        block.get_by_position(result).column = std::move(result_column);
2899
20
        return Status::OK();
2900
22
    }
2901
2902
private:
2903
    Status clone_without_path(const JsonbValue* root, const JsonbPath& path,
2904
24
                              JsonbWriter& writer) const {
2905
        // Start writing at the root level
2906
24
        if (root->isObject()) {
2907
15
            writer.writeStartObject();
2908
15
            RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer));
2909
15
            writer.writeEndObject();
2910
15
        } else if (root->isArray()) {
2911
9
            writer.writeStartArray();
2912
9
            RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer));
2913
9
            writer.writeEndArray();
2914
9
        } else {
2915
            // Primitive value - can't remove anything from it
2916
0
            writer.writeValue(root);
2917
0
        }
2918
24
        return Status::OK();
2919
24
    }
2920
2921
    Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path,
2922
20
                                     size_t depth, JsonbWriter& writer) const {
2923
20
        const auto* obj = obj_value->unpack<ObjectVal>();
2924
2925
40
        for (const auto& kv : *obj) {
2926
40
            std::string key(kv.getKeyStr(), kv.klen());
2927
2928
40
            if (depth < path.get_leg_vector_size()) {
2929
40
                const auto* leg = path.get_leg_from_leg_vector(depth);
2930
40
                if (leg->type == MEMBER_CODE) {
2931
40
                    std::string target_key(leg->leg_ptr, leg->leg_len);
2932
2933
40
                    if (key == target_key) {
2934
20
                        if (depth == path.get_leg_vector_size() - 1) {
2935
12
                            continue;
2936
12
                        } else {
2937
8
                            writer.writeKey(kv.getKeyStr(), kv.klen());
2938
8
                            if (kv.value()->isObject()) {
2939
3
                                writer.writeStartObject();
2940
3
                                RETURN_IF_ERROR(clone_object_without_path(kv.value(), path,
2941
3
                                                                          depth + 1, writer));
2942
3
                                writer.writeEndObject();
2943
5
                            } else if (kv.value()->isArray()) {
2944
5
                                writer.writeStartArray();
2945
5
                                RETURN_IF_ERROR(clone_array_without_path(kv.value(), path,
2946
5
                                                                         depth + 1, writer));
2947
5
                                writer.writeEndArray();
2948
5
                            } else {
2949
0
                                writer.writeValue(kv.value());
2950
0
                            }
2951
8
                        }
2952
20
                    } else {
2953
20
                        writer.writeKey(kv.getKeyStr(), kv.klen());
2954
20
                        writer.writeValue(kv.value());
2955
20
                    }
2956
40
                } else {
2957
0
                    writer.writeKey(kv.getKeyStr(), kv.klen());
2958
0
                    writer.writeValue(kv.value());
2959
0
                }
2960
40
            } else {
2961
0
                writer.writeKey(kv.getKeyStr(), kv.klen());
2962
0
                writer.writeValue(kv.value());
2963
0
            }
2964
40
        }
2965
2966
20
        return Status::OK();
2967
20
    }
2968
2969
    Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path,
2970
17
                                    size_t depth, JsonbWriter& writer) const {
2971
17
        const auto* arr = arr_value->unpack<ArrayVal>();
2972
2973
17
        int index = 0;
2974
52
        for (const auto& element : *arr) {
2975
52
            if (depth < path.get_leg_vector_size()) {
2976
52
                const auto* leg = path.get_leg_from_leg_vector(depth);
2977
52
                if (leg->type == ARRAY_CODE) {
2978
52
                    int target_index = leg->array_index;
2979
2980
52
                    if (index == target_index) {
2981
17
                        if (depth == path.get_leg_vector_size() - 1) {
2982
                            // This is the target element to remove - skip it
2983
12
                        } else {
2984
5
                            if (element.isObject()) {
2985
2
                                writer.writeStartObject();
2986
2
                                RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1,
2987
2
                                                                          writer));
2988
2
                                writer.writeEndObject();
2989
3
                            } else if (element.isArray()) {
2990
3
                                writer.writeStartArray();
2991
3
                                RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1,
2992
3
                                                                         writer));
2993
3
                                writer.writeEndArray();
2994
3
                            } else {
2995
0
                                writer.writeValue(&element);
2996
0
                            }
2997
5
                        }
2998
35
                    } else {
2999
35
                        writer.writeValue(&element);
3000
35
                    }
3001
52
                } else {
3002
0
                    writer.writeValue(&element);
3003
0
                }
3004
52
            } else {
3005
0
                writer.writeValue(&element);
3006
0
            }
3007
52
            index++;
3008
52
        }
3009
3010
17
        return Status::OK();
3011
17
    }
3012
};
3013
3014
class FunctionStripNullValue : public IFunction {
3015
public:
3016
    static constexpr auto name = "strip_null_value";
3017
24
    static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); }
3018
3019
1
    String get_name() const override { return name; }
3020
16
    bool is_variadic() const override { return false; }
3021
15
    size_t get_number_of_arguments() const override { return 1; }
3022
3023
30
    bool use_default_implementation_for_nulls() const override { return false; }
3024
3025
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3026
15
        return make_nullable(std::make_shared<DataTypeJsonb>());
3027
15
    }
3028
3029
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3030
15
                        uint32_t result, size_t input_rows_count) const override {
3031
15
        const auto arg_column =
3032
15
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
3033
15
        const ColumnString* json_column = nullptr;
3034
15
        const NullMap* json_null_map = nullptr;
3035
15
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(arg_column.get())) {
3036
15
            json_column = assert_cast<const ColumnString*>(&nullable_col->get_nested_column());
3037
15
            json_null_map = &nullable_col->get_null_map_data();
3038
15
        } else {
3039
0
            json_column = assert_cast<const ColumnString*>(arg_column.get());
3040
0
        }
3041
3042
15
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
3043
15
        auto result_column = return_data_type->create_column();
3044
3045
15
        auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data();
3046
15
        auto& result_data_col = assert_cast<ColumnString&>(
3047
15
                assert_cast<ColumnNullable&>(*result_column).get_nested_column());
3048
3049
15
        result_nullmap.resize_fill(input_rows_count, 0);
3050
60
        for (size_t i = 0; i != input_rows_count; ++i) {
3051
45
            if (json_null_map && (*json_null_map)[i]) {
3052
13
                result_nullmap[i] = 1;
3053
13
                result_data_col.insert_default();
3054
13
                continue;
3055
13
            }
3056
32
            const JsonbDocument* json_doc = nullptr;
3057
32
            const auto& json_str = json_column->get_data_at(i);
3058
32
            RETURN_IF_ERROR(
3059
32
                    JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc));
3060
32
            if (json_doc) [[likely]] {
3061
32
                if (json_doc->getValue()->isNull()) {
3062
9
                    result_nullmap[i] = 1;
3063
9
                    result_data_col.insert_default();
3064
23
                } else {
3065
23
                    result_nullmap[i] = 0;
3066
23
                    result_data_col.insert_data(json_str.data, json_str.size);
3067
23
                }
3068
32
            } else {
3069
0
                result_nullmap[i] = 1;
3070
0
                result_data_col.insert_default();
3071
0
            }
3072
32
        }
3073
3074
15
        block.get_by_position(result).column = std::move(result_column);
3075
15
        return Status::OK();
3076
15
    }
3077
};
3078
3079
8
void register_function_jsonb(SimpleFunctionFactory& factory) {
3080
8
    factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
3081
8
    factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
3082
8
    factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null");
3083
8
    factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null");
3084
8
    factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value");
3085
8
    factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value");
3086
3087
8
    factory.register_function<FunctionJsonbExists>();
3088
8
    factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias);
3089
8
    factory.register_function<FunctionJsonbType>();
3090
8
    factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias);
3091
3092
8
    factory.register_function<FunctionJsonbKeys>();
3093
8
    factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias);
3094
3095
8
    factory.register_function<FunctionJsonbExtractIsnull>();
3096
8
    factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias);
3097
3098
8
    factory.register_function<FunctionJsonbExtractJsonb>();
3099
8
    factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias);
3100
8
    factory.register_function<FunctionJsonbExtractJsonbNoQuotes>();
3101
8
    factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name,
3102
8
                           FunctionJsonbExtractJsonbNoQuotes::alias);
3103
3104
8
    factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
3105
8
    factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
3106
3107
8
    factory.register_function<FunctionJsonSearch>();
3108
3109
8
    factory.register_function<FunctionJsonbArray<false>>();
3110
8
    factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias);
3111
3112
8
    factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null");
3113
8
    factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null");
3114
3115
8
    factory.register_function<FunctionJsonbObject>();
3116
8
    factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias);
3117
3118
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>();
3119
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name,
3120
8
                           FunctionJsonbModify<JsonbModifyType::Insert>::alias);
3121
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>();
3122
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name,
3123
8
                           FunctionJsonbModify<JsonbModifyType::Set>::alias);
3124
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>();
3125
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name,
3126
8
                           FunctionJsonbModify<JsonbModifyType::Replace>::alias);
3127
3128
8
    factory.register_function<FunctionJsonbRemove>();
3129
8
    factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias);
3130
3131
8
    factory.register_function<FunctionStripNullValue>();
3132
8
}
3133
3134
} // namespace doris