Coverage Report

Created: 2026-06-23 19:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_jsonb.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
20
#include <algorithm>
21
#include <cstdlib>
22
#include <memory>
23
#include <string>
24
#include <string_view>
25
#include <tuple>
26
#include <type_traits>
27
#include <utility>
28
#include <variant>
29
30
#include "common/compiler_util.h" // IWYU pragma: keep
31
#include "common/status.h"
32
#include "core/assert_cast.h"
33
#include "core/block/block.h"
34
#include "core/block/column_numbers.h"
35
#include "core/block/column_with_type_and_name.h"
36
#include "core/column/column.h"
37
#include "core/column/column_array.h"
38
#include "core/column/column_const.h"
39
#include "core/column/column_nullable.h"
40
#include "core/column/column_string.h"
41
#include "core/column/column_vector.h"
42
#include "core/custom_allocator.h"
43
#include "core/data_type/data_type.h"
44
#include "core/data_type/data_type_array.h"
45
#include "core/data_type/data_type_jsonb.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/data_type/data_type_string.h"
48
#include "core/data_type/define_primitive_type.h"
49
#include "core/data_type/primitive_type.h"
50
#include "core/string_ref.h"
51
#include "core/types.h"
52
#include "core/value/jsonb_value.h"
53
#include "exec/common/stringop_substring.h"
54
#include "exec/common/template_helpers.hpp"
55
#include "exec/common/util.hpp"
56
#include "exprs/aggregate/aggregate_function.h"
57
#include "exprs/function/function.h"
58
#include "exprs/function/like.h"
59
#include "exprs/function/simple_function_factory.h"
60
#include "exprs/function_context.h"
61
#include "util/jsonb_document.h"
62
#include "util/jsonb_utils.h"
63
#include "util/jsonb_writer.h"
64
#include "util/simd/bits.h"
65
66
namespace doris {
67
68
enum class NullalbeMode { NULLABLE = 0, FOLLOW_INPUT };
69
70
enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE };
71
72
// func(string,string) -> json
73
template <NullalbeMode nullable_mode, JsonbParseErrorMode parse_error_handle_mode>
74
class FunctionJsonbParseBase : public IFunction {
75
private:
76
    struct FunctionJsonbParseState {
77
        StringRef default_value;
78
        JsonBinaryValue default_value_parser;
79
        bool has_const_default_value = false;
80
        bool default_is_null = false;
81
    };
82
83
public:
84
    static constexpr auto name = "json_parse";
85
    static constexpr auto alias = "jsonb_parse";
86
88
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE6createEv
Line
Count
Source
86
27
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE6createEv
Line
Count
Source
86
39
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE6createEv
Line
Count
Source
86
22
    static FunctionPtr create() { return std::make_shared<FunctionJsonbParseBase>(); }
87
88
4
    String get_name() const override {
89
4
        String error_mode;
90
4
        switch (parse_error_handle_mode) {
91
1
        case JsonbParseErrorMode::FAIL:
92
1
            break;
93
1
        case JsonbParseErrorMode::RETURN_NULL:
94
1
            error_mode = "_error_to_null";
95
1
            break;
96
2
        case JsonbParseErrorMode::RETURN_VALUE:
97
2
            error_mode = "_error_to_value";
98
2
            break;
99
4
        }
100
101
4
        return name + error_mode;
102
4
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE8get_nameB5cxx11Ev
Line
Count
Source
88
1
    String get_name() const override {
89
1
        String error_mode;
90
1
        switch (parse_error_handle_mode) {
91
1
        case JsonbParseErrorMode::FAIL:
92
1
            break;
93
0
        case JsonbParseErrorMode::RETURN_NULL:
94
0
            error_mode = "_error_to_null";
95
0
            break;
96
0
        case JsonbParseErrorMode::RETURN_VALUE:
97
0
            error_mode = "_error_to_value";
98
0
            break;
99
1
        }
100
101
1
        return name + error_mode;
102
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE8get_nameB5cxx11Ev
Line
Count
Source
88
1
    String get_name() const override {
89
1
        String error_mode;
90
1
        switch (parse_error_handle_mode) {
91
0
        case JsonbParseErrorMode::FAIL:
92
0
            break;
93
1
        case JsonbParseErrorMode::RETURN_NULL:
94
1
            error_mode = "_error_to_null";
95
1
            break;
96
0
        case JsonbParseErrorMode::RETURN_VALUE:
97
0
            error_mode = "_error_to_value";
98
0
            break;
99
1
        }
100
101
1
        return name + error_mode;
102
1
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE8get_nameB5cxx11Ev
Line
Count
Source
88
2
    String get_name() const override {
89
2
        String error_mode;
90
2
        switch (parse_error_handle_mode) {
91
0
        case JsonbParseErrorMode::FAIL:
92
0
            break;
93
0
        case JsonbParseErrorMode::RETURN_NULL:
94
0
            error_mode = "_error_to_null";
95
0
            break;
96
2
        case JsonbParseErrorMode::RETURN_VALUE:
97
2
            error_mode = "_error_to_value";
98
2
            break;
99
2
        }
100
101
2
        return name + error_mode;
102
2
    }
103
104
65
    bool is_variadic() const override {
105
65
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
106
65
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE11is_variadicEv
Line
Count
Source
104
19
    bool is_variadic() const override {
105
19
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
106
19
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE11is_variadicEv
Line
Count
Source
104
31
    bool is_variadic() const override {
105
31
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
106
31
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE11is_variadicEv
Line
Count
Source
104
15
    bool is_variadic() const override {
105
15
        return parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE;
106
15
    }
107
108
49
    size_t get_number_of_arguments() const override {
109
49
        switch (parse_error_handle_mode) {
110
18
        case JsonbParseErrorMode::FAIL:
111
18
            return 1;
112
30
        case JsonbParseErrorMode::RETURN_NULL:
113
30
            return 1;
114
1
        case JsonbParseErrorMode::RETURN_VALUE:
115
1
            return 0;
116
49
        }
117
49
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE23get_number_of_argumentsEv
Line
Count
Source
108
18
    size_t get_number_of_arguments() const override {
109
18
        switch (parse_error_handle_mode) {
110
18
        case JsonbParseErrorMode::FAIL:
111
18
            return 1;
112
0
        case JsonbParseErrorMode::RETURN_NULL:
113
0
            return 1;
114
0
        case JsonbParseErrorMode::RETURN_VALUE:
115
0
            return 0;
116
18
        }
117
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE23get_number_of_argumentsEv
Line
Count
Source
108
30
    size_t get_number_of_arguments() const override {
109
30
        switch (parse_error_handle_mode) {
110
0
        case JsonbParseErrorMode::FAIL:
111
0
            return 1;
112
30
        case JsonbParseErrorMode::RETURN_NULL:
113
30
            return 1;
114
0
        case JsonbParseErrorMode::RETURN_VALUE:
115
0
            return 0;
116
30
        }
117
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE23get_number_of_argumentsEv
Line
Count
Source
108
1
    size_t get_number_of_arguments() const override {
109
1
        switch (parse_error_handle_mode) {
110
0
        case JsonbParseErrorMode::FAIL:
111
0
            return 1;
112
0
        case JsonbParseErrorMode::RETURN_NULL:
113
0
            return 1;
114
1
        case JsonbParseErrorMode::RETURN_VALUE:
115
1
            return 0;
116
1
        }
117
1
    }
118
119
61
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
120
61
        bool is_nullable = false;
121
61
        switch (nullable_mode) {
122
30
        case NullalbeMode::NULLABLE:
123
30
            is_nullable = true;
124
30
            break;
125
31
        case NullalbeMode::FOLLOW_INPUT: {
126
43
            for (auto arg : arguments) {
127
43
                is_nullable |= arg->is_nullable();
128
43
            }
129
31
            break;
130
0
        }
131
61
        }
132
133
61
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
134
61
                           : std::make_shared<DataTypeJsonb>();
135
61
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
119
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
120
18
        bool is_nullable = false;
121
18
        switch (nullable_mode) {
122
0
        case NullalbeMode::NULLABLE:
123
0
            is_nullable = true;
124
0
            break;
125
18
        case NullalbeMode::FOLLOW_INPUT: {
126
18
            for (auto arg : arguments) {
127
18
                is_nullable |= arg->is_nullable();
128
18
            }
129
18
            break;
130
0
        }
131
18
        }
132
133
18
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
134
18
                           : std::make_shared<DataTypeJsonb>();
135
18
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
119
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
120
30
        bool is_nullable = false;
121
30
        switch (nullable_mode) {
122
30
        case NullalbeMode::NULLABLE:
123
30
            is_nullable = true;
124
30
            break;
125
0
        case NullalbeMode::FOLLOW_INPUT: {
126
0
            for (auto arg : arguments) {
127
0
                is_nullable |= arg->is_nullable();
128
0
            }
129
0
            break;
130
0
        }
131
30
        }
132
133
30
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
134
30
                           : std::make_shared<DataTypeJsonb>();
135
30
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
119
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
120
13
        bool is_nullable = false;
121
13
        switch (nullable_mode) {
122
0
        case NullalbeMode::NULLABLE:
123
0
            is_nullable = true;
124
0
            break;
125
13
        case NullalbeMode::FOLLOW_INPUT: {
126
25
            for (auto arg : arguments) {
127
25
                is_nullable |= arg->is_nullable();
128
25
            }
129
13
            break;
130
0
        }
131
13
        }
132
133
13
        return is_nullable ? make_nullable(std::make_shared<DataTypeJsonb>())
134
13
                           : std::make_shared<DataTypeJsonb>();
135
13
    }
136
137
137
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
137
44
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
137
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
137
29
    bool use_default_implementation_for_nulls() const override { return false; }
138
139
306
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
140
306
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
141
60
            std::shared_ptr<FunctionJsonbParseState> state =
142
60
                    std::make_shared<FunctionJsonbParseState>();
143
60
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
144
60
        }
145
306
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
146
129
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
147
12
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
148
12
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
149
12
                if (state) {
150
12
                    if (context->get_num_args() == 2) {
151
9
                        if (context->is_col_constant(1)) {
152
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
153
2
                            if (default_value_col->is_null_at(0)) {
154
1
                                state->default_is_null = true;
155
1
                            } else {
156
1
                                const auto& default_value = default_value_col->get_data_at(0);
157
158
1
                                state->default_value = default_value;
159
1
                                state->has_const_default_value = true;
160
1
                            }
161
2
                        }
162
9
                    } else if (context->get_num_args() == 1) {
163
2
                        RETURN_IF_ERROR(
164
2
                                state->default_value_parser.from_json_string(std::string("{}")));
165
2
                        state->default_value = StringRef(state->default_value_parser.value(),
166
2
                                                         state->default_value_parser.size());
167
2
                        state->has_const_default_value = true;
168
2
                    }
169
12
                }
170
12
            }
171
172
129
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
173
1
                return Status::InvalidArgument(
174
1
                        "{} function should have 1 or 2 arguments, "
175
1
                        "but got {}",
176
1
                        get_name(), context->get_num_args());
177
1
            }
178
129
        }
179
128
        return Status::OK();
180
306
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
139
67
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
140
67
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
141
18
            std::shared_ptr<FunctionJsonbParseState> state =
142
18
                    std::make_shared<FunctionJsonbParseState>();
143
18
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
144
18
        }
145
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
146
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
147
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
148
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
149
                if (state) {
150
                    if (context->get_num_args() == 2) {
151
                        if (context->is_col_constant(1)) {
152
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
153
                            if (default_value_col->is_null_at(0)) {
154
                                state->default_is_null = true;
155
                            } else {
156
                                const auto& default_value = default_value_col->get_data_at(0);
157
158
                                state->default_value = default_value;
159
                                state->has_const_default_value = true;
160
                            }
161
                        }
162
                    } else if (context->get_num_args() == 1) {
163
                        RETURN_IF_ERROR(
164
                                state->default_value_parser.from_json_string(std::string("{}")));
165
                        state->default_value = StringRef(state->default_value_parser.value(),
166
                                                         state->default_value_parser.size());
167
                        state->has_const_default_value = true;
168
                    }
169
                }
170
            }
171
172
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
173
                return Status::InvalidArgument(
174
                        "{} function should have 1 or 2 arguments, "
175
                        "but got {}",
176
                        get_name(), context->get_num_args());
177
            }
178
        }
179
67
        return Status::OK();
180
67
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
139
110
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
140
110
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
141
30
            std::shared_ptr<FunctionJsonbParseState> state =
142
30
                    std::make_shared<FunctionJsonbParseState>();
143
30
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
144
30
        }
145
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
146
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
147
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
148
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
149
                if (state) {
150
                    if (context->get_num_args() == 2) {
151
                        if (context->is_col_constant(1)) {
152
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
153
                            if (default_value_col->is_null_at(0)) {
154
                                state->default_is_null = true;
155
                            } else {
156
                                const auto& default_value = default_value_col->get_data_at(0);
157
158
                                state->default_value = default_value;
159
                                state->has_const_default_value = true;
160
                            }
161
                        }
162
                    } else if (context->get_num_args() == 1) {
163
                        RETURN_IF_ERROR(
164
                                state->default_value_parser.from_json_string(std::string("{}")));
165
                        state->default_value = StringRef(state->default_value_parser.value(),
166
                                                         state->default_value_parser.size());
167
                        state->has_const_default_value = true;
168
                    }
169
                }
170
            }
171
172
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
173
                return Status::InvalidArgument(
174
                        "{} function should have 1 or 2 arguments, "
175
                        "but got {}",
176
                        get_name(), context->get_num_args());
177
            }
178
        }
179
110
        return Status::OK();
180
110
    }
_ZN5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE4openEPNS_15FunctionContextENS4_18FunctionStateScopeE
Line
Count
Source
139
129
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
140
129
        if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
141
12
            std::shared_ptr<FunctionJsonbParseState> state =
142
12
                    std::make_shared<FunctionJsonbParseState>();
143
12
            context->set_function_state(FunctionContext::FRAGMENT_LOCAL, state);
144
12
        }
145
129
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
146
129
            if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
147
12
                auto* state = reinterpret_cast<FunctionJsonbParseState*>(
148
12
                        context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
149
12
                if (state) {
150
12
                    if (context->get_num_args() == 2) {
151
9
                        if (context->is_col_constant(1)) {
152
2
                            const auto default_value_col = context->get_constant_col(1)->column_ptr;
153
2
                            if (default_value_col->is_null_at(0)) {
154
1
                                state->default_is_null = true;
155
1
                            } else {
156
1
                                const auto& default_value = default_value_col->get_data_at(0);
157
158
1
                                state->default_value = default_value;
159
1
                                state->has_const_default_value = true;
160
1
                            }
161
2
                        }
162
9
                    } else if (context->get_num_args() == 1) {
163
2
                        RETURN_IF_ERROR(
164
2
                                state->default_value_parser.from_json_string(std::string("{}")));
165
2
                        state->default_value = StringRef(state->default_value_parser.value(),
166
2
                                                         state->default_value_parser.size());
167
2
                        state->has_const_default_value = true;
168
2
                    }
169
12
                }
170
12
            }
171
172
129
            if (context->get_num_args() != 1 && context->get_num_args() != 2) {
173
1
                return Status::InvalidArgument(
174
1
                        "{} function should have 1 or 2 arguments, "
175
1
                        "but got {}",
176
1
                        get_name(), context->get_num_args());
177
1
            }
178
129
        }
179
128
        return Status::OK();
180
129
    }
181
182
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
183
76
                        uint32_t result, size_t input_rows_count) const override {
184
76
        auto&& [col_from, col_from_is_const] =
185
76
                unpack_if_const(block.get_by_position(arguments[0]).column);
186
187
76
        if (col_from_is_const && col_from->is_null_at(0)) {
188
1
            auto col_str = ColumnString::create();
189
1
            col_str->insert_default();
190
1
            auto null_map = ColumnUInt8::create(1, 1);
191
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
192
1
            block.get_by_position(result).column =
193
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
194
1
            return Status::OK();
195
1
        }
196
197
75
        auto null_map = ColumnUInt8::create(0, 0);
198
75
        bool is_nullable = false;
199
200
75
        switch (nullable_mode) {
201
34
        case NullalbeMode::NULLABLE: {
202
34
            is_nullable = true;
203
34
            break;
204
0
        }
205
41
        case NullalbeMode::FOLLOW_INPUT: {
206
54
            for (auto arg : arguments) {
207
54
                is_nullable |= block.get_by_position(arg).type->is_nullable();
208
54
            }
209
41
            break;
210
0
        }
211
75
        }
212
213
75
        if (is_nullable) {
214
65
            null_map = ColumnUInt8::create(input_rows_count, 0);
215
65
        }
216
217
60
        const ColumnString* col_from_string = nullptr;
218
75
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
219
40
            VectorizedUtils::update_null_map(null_map->get_data(),
220
40
                                             nullable_col->get_null_map_data(), col_from_is_const);
221
40
            col_from_string =
222
40
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
223
40
        } else {
224
35
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
225
35
        }
226
227
60
        StringRef constant_default_value;
228
60
        bool default_value_const = false;
229
60
        bool default_value_null_const = false;
230
60
        ColumnPtr default_value_col;
231
60
        JsonBinaryValue default_jsonb_value_parser;
232
60
        const ColumnString* default_value_str_col = nullptr;
233
60
        const NullMap* default_value_nullmap = nullptr;
234
60
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
235
15
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
236
15
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
237
15
            if (state && state->has_const_default_value) {
238
7
                constant_default_value = state->default_value;
239
7
                default_value_null_const = state->default_is_null;
240
7
                default_value_const = true;
241
8
            } else if (arguments.size() > 1) {
242
8
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
243
8
                    PrimitiveType::TYPE_JSONB) {
244
1
                    return Status::InvalidArgument(
245
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
246
1
                            block.get_by_position(arguments[1]).type->get_name());
247
1
                }
248
7
                std::tie(default_value_col, default_value_const) =
249
7
                        unpack_if_const(block.get_by_position(arguments[1]).column);
250
7
                if (default_value_const) {
251
1
                    const JsonbDocument* default_value_doc = nullptr;
252
1
                    if (default_value_col->is_null_at(0)) {
253
1
                        default_value_null_const = true;
254
1
                    } else {
255
0
                        auto data = default_value_col->get_data_at(0);
256
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
257
0
                                                                              &default_value_doc));
258
0
                        constant_default_value = data;
259
0
                    }
260
6
                } else {
261
6
                    if (const auto* nullable_col =
262
6
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
263
4
                        default_value_str_col = assert_cast<const ColumnString*>(
264
4
                                nullable_col->get_nested_column_ptr().get());
265
4
                        default_value_nullmap = &(nullable_col->get_null_map_data());
266
4
                    } else {
267
2
                        default_value_str_col =
268
2
                                assert_cast<const ColumnString*>(default_value_col.get());
269
2
                    }
270
6
                }
271
7
            } else if (arguments.size() == 1) {
272
                // parse default value '{}' should always success.
273
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
274
0
                default_value_const = true;
275
0
                constant_default_value.data = default_jsonb_value_parser.value();
276
0
                constant_default_value.size = default_jsonb_value_parser.size();
277
0
            }
278
15
        }
279
280
14
        auto col_to = ColumnString::create();
281
282
60
        col_to->reserve(input_rows_count);
283
284
60
        auto& null_map_data = null_map->get_data();
285
286
        // parser can be reused for performance
287
60
        JsonBinaryValue jsonb_value;
288
289
1.30k
        for (size_t i = 0; i < input_rows_count; ++i) {
290
1.22k
            if (is_nullable && null_map_data[i]) {
291
13
                col_to->insert_default();
292
13
                continue;
293
13
            }
294
295
1.21k
            auto index = index_check_const(i, col_from_is_const);
296
1.21k
            const auto& val = col_from_string->get_data_at(index);
297
1.21k
            auto st = jsonb_value.from_json_string(val.data, val.size);
298
1.21k
            if (st.ok()) {
299
                // insert jsonb format data
300
1.16k
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
301
1.16k
            } else {
302
54
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
303
6
                    return Status::InvalidArgument(
304
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
305
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
306
17
                    null_map_data[i] = 1;
307
17
                    col_to->insert_default();
308
31
                } else {
309
31
                    if (default_value_const) {
310
9
                        if (default_value_null_const) {
311
3
                            null_map_data[i] = 1;
312
3
                            col_to->insert_default();
313
6
                        } else {
314
6
                            col_to->insert_data(constant_default_value.data,
315
6
                                                constant_default_value.size);
316
6
                        }
317
22
                    } else {
318
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
319
3
                            null_map_data[i] = 1;
320
3
                            col_to->insert_default();
321
3
                            continue;
322
3
                        }
323
19
                        auto value = default_value_str_col->get_data_at(i);
324
19
                        col_to->insert_data(value.data, value.size);
325
19
                    }
326
31
                }
327
54
            }
328
1.21k
        }
329
330
78
        if (is_nullable) {
331
59
            block.replace_by_position(
332
59
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
333
59
        } else {
334
19
            block.replace_by_position(result, std::move(col_to));
335
19
        }
336
337
18
        return Status::OK();
338
15
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
183
26
                        uint32_t result, size_t input_rows_count) const override {
184
26
        auto&& [col_from, col_from_is_const] =
185
26
                unpack_if_const(block.get_by_position(arguments[0]).column);
186
187
26
        if (col_from_is_const && col_from->is_null_at(0)) {
188
0
            auto col_str = ColumnString::create();
189
0
            col_str->insert_default();
190
0
            auto null_map = ColumnUInt8::create(1, 1);
191
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
192
0
            block.get_by_position(result).column =
193
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
194
0
            return Status::OK();
195
0
        }
196
197
26
        auto null_map = ColumnUInt8::create(0, 0);
198
26
        bool is_nullable = false;
199
200
26
        switch (nullable_mode) {
201
0
        case NullalbeMode::NULLABLE: {
202
0
            is_nullable = true;
203
0
            break;
204
0
        }
205
26
        case NullalbeMode::FOLLOW_INPUT: {
206
26
            for (auto arg : arguments) {
207
26
                is_nullable |= block.get_by_position(arg).type->is_nullable();
208
26
            }
209
26
            break;
210
0
        }
211
26
        }
212
213
26
        if (is_nullable) {
214
17
            null_map = ColumnUInt8::create(input_rows_count, 0);
215
17
        }
216
217
26
        const ColumnString* col_from_string = nullptr;
218
26
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
219
17
            VectorizedUtils::update_null_map(null_map->get_data(),
220
17
                                             nullable_col->get_null_map_data(), col_from_is_const);
221
17
            col_from_string =
222
17
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
223
17
        } else {
224
9
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
225
9
        }
226
227
26
        StringRef constant_default_value;
228
26
        bool default_value_const = false;
229
26
        bool default_value_null_const = false;
230
26
        ColumnPtr default_value_col;
231
26
        JsonBinaryValue default_jsonb_value_parser;
232
26
        const ColumnString* default_value_str_col = nullptr;
233
26
        const NullMap* default_value_nullmap = nullptr;
234
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
235
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
236
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
237
            if (state && state->has_const_default_value) {
238
                constant_default_value = state->default_value;
239
                default_value_null_const = state->default_is_null;
240
                default_value_const = true;
241
            } else if (arguments.size() > 1) {
242
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
243
                    PrimitiveType::TYPE_JSONB) {
244
                    return Status::InvalidArgument(
245
                            "{} second argument should be jsonb type, but got {}", get_name(),
246
                            block.get_by_position(arguments[1]).type->get_name());
247
                }
248
                std::tie(default_value_col, default_value_const) =
249
                        unpack_if_const(block.get_by_position(arguments[1]).column);
250
                if (default_value_const) {
251
                    const JsonbDocument* default_value_doc = nullptr;
252
                    if (default_value_col->is_null_at(0)) {
253
                        default_value_null_const = true;
254
                    } else {
255
                        auto data = default_value_col->get_data_at(0);
256
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
257
                                                                              &default_value_doc));
258
                        constant_default_value = data;
259
                    }
260
                } else {
261
                    if (const auto* nullable_col =
262
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
263
                        default_value_str_col = assert_cast<const ColumnString*>(
264
                                nullable_col->get_nested_column_ptr().get());
265
                        default_value_nullmap = &(nullable_col->get_null_map_data());
266
                    } else {
267
                        default_value_str_col =
268
                                assert_cast<const ColumnString*>(default_value_col.get());
269
                    }
270
                }
271
            } else if (arguments.size() == 1) {
272
                // parse default value '{}' should always success.
273
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
274
                default_value_const = true;
275
                constant_default_value.data = default_jsonb_value_parser.value();
276
                constant_default_value.size = default_jsonb_value_parser.size();
277
            }
278
        }
279
280
26
        auto col_to = ColumnString::create();
281
282
26
        col_to->reserve(input_rows_count);
283
284
26
        auto& null_map_data = null_map->get_data();
285
286
        // parser can be reused for performance
287
26
        JsonBinaryValue jsonb_value;
288
289
68
        for (size_t i = 0; i < input_rows_count; ++i) {
290
42
            if (is_nullable && null_map_data[i]) {
291
1
                col_to->insert_default();
292
1
                continue;
293
1
            }
294
295
41
            auto index = index_check_const(i, col_from_is_const);
296
41
            const auto& val = col_from_string->get_data_at(index);
297
41
            auto st = jsonb_value.from_json_string(val.data, val.size);
298
41
            if (st.ok()) {
299
                // insert jsonb format data
300
35
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
301
35
            } else {
302
6
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
303
6
                    return Status::InvalidArgument(
304
6
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
305
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
306
                    null_map_data[i] = 1;
307
                    col_to->insert_default();
308
                } else {
309
                    if (default_value_const) {
310
                        if (default_value_null_const) {
311
                            null_map_data[i] = 1;
312
                            col_to->insert_default();
313
                        } else {
314
                            col_to->insert_data(constant_default_value.data,
315
                                                constant_default_value.size);
316
                        }
317
                    } else {
318
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
319
                            null_map_data[i] = 1;
320
                            col_to->insert_default();
321
                            continue;
322
                        }
323
                        auto value = default_value_str_col->get_data_at(i);
324
                        col_to->insert_data(value.data, value.size);
325
                    }
326
                }
327
6
            }
328
41
        }
329
330
26
        if (is_nullable) {
331
11
            block.replace_by_position(
332
11
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
333
15
        } else {
334
15
            block.replace_by_position(result, std::move(col_to));
335
15
        }
336
337
26
        return Status::OK();
338
26
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE0ELNS_19JsonbParseErrorModeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
183
34
                        uint32_t result, size_t input_rows_count) const override {
184
34
        auto&& [col_from, col_from_is_const] =
185
34
                unpack_if_const(block.get_by_position(arguments[0]).column);
186
187
34
        if (col_from_is_const && col_from->is_null_at(0)) {
188
0
            auto col_str = ColumnString::create();
189
0
            col_str->insert_default();
190
0
            auto null_map = ColumnUInt8::create(1, 1);
191
0
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
192
0
            block.get_by_position(result).column =
193
0
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
194
0
            return Status::OK();
195
0
        }
196
197
34
        auto null_map = ColumnUInt8::create(0, 0);
198
34
        bool is_nullable = false;
199
200
34
        switch (nullable_mode) {
201
34
        case NullalbeMode::NULLABLE: {
202
34
            is_nullable = true;
203
34
            break;
204
0
        }
205
0
        case NullalbeMode::FOLLOW_INPUT: {
206
0
            for (auto arg : arguments) {
207
0
                is_nullable |= block.get_by_position(arg).type->is_nullable();
208
0
            }
209
0
            break;
210
0
        }
211
34
        }
212
213
34
        if (is_nullable) {
214
34
            null_map = ColumnUInt8::create(input_rows_count, 0);
215
34
        }
216
217
34
        const ColumnString* col_from_string = nullptr;
218
34
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
219
11
            VectorizedUtils::update_null_map(null_map->get_data(),
220
11
                                             nullable_col->get_null_map_data(), col_from_is_const);
221
11
            col_from_string =
222
11
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
223
23
        } else {
224
23
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
225
23
        }
226
227
34
        StringRef constant_default_value;
228
34
        bool default_value_const = false;
229
34
        bool default_value_null_const = false;
230
34
        ColumnPtr default_value_col;
231
34
        JsonBinaryValue default_jsonb_value_parser;
232
34
        const ColumnString* default_value_str_col = nullptr;
233
34
        const NullMap* default_value_nullmap = nullptr;
234
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
235
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
236
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
237
            if (state && state->has_const_default_value) {
238
                constant_default_value = state->default_value;
239
                default_value_null_const = state->default_is_null;
240
                default_value_const = true;
241
            } else if (arguments.size() > 1) {
242
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
243
                    PrimitiveType::TYPE_JSONB) {
244
                    return Status::InvalidArgument(
245
                            "{} second argument should be jsonb type, but got {}", get_name(),
246
                            block.get_by_position(arguments[1]).type->get_name());
247
                }
248
                std::tie(default_value_col, default_value_const) =
249
                        unpack_if_const(block.get_by_position(arguments[1]).column);
250
                if (default_value_const) {
251
                    const JsonbDocument* default_value_doc = nullptr;
252
                    if (default_value_col->is_null_at(0)) {
253
                        default_value_null_const = true;
254
                    } else {
255
                        auto data = default_value_col->get_data_at(0);
256
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
257
                                                                              &default_value_doc));
258
                        constant_default_value = data;
259
                    }
260
                } else {
261
                    if (const auto* nullable_col =
262
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
263
                        default_value_str_col = assert_cast<const ColumnString*>(
264
                                nullable_col->get_nested_column_ptr().get());
265
                        default_value_nullmap = &(nullable_col->get_null_map_data());
266
                    } else {
267
                        default_value_str_col =
268
                                assert_cast<const ColumnString*>(default_value_col.get());
269
                    }
270
                }
271
            } else if (arguments.size() == 1) {
272
                // parse default value '{}' should always success.
273
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
274
                default_value_const = true;
275
                constant_default_value.data = default_jsonb_value_parser.value();
276
                constant_default_value.size = default_jsonb_value_parser.size();
277
            }
278
        }
279
280
34
        auto col_to = ColumnString::create();
281
282
34
        col_to->reserve(input_rows_count);
283
284
34
        auto& null_map_data = null_map->get_data();
285
286
        // parser can be reused for performance
287
34
        JsonBinaryValue jsonb_value;
288
289
99
        for (size_t i = 0; i < input_rows_count; ++i) {
290
65
            if (is_nullable && null_map_data[i]) {
291
6
                col_to->insert_default();
292
6
                continue;
293
6
            }
294
295
59
            auto index = index_check_const(i, col_from_is_const);
296
59
            const auto& val = col_from_string->get_data_at(index);
297
59
            auto st = jsonb_value.from_json_string(val.data, val.size);
298
59
            if (st.ok()) {
299
                // insert jsonb format data
300
42
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
301
42
            } else {
302
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
303
                    return Status::InvalidArgument(
304
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
305
17
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
306
17
                    null_map_data[i] = 1;
307
17
                    col_to->insert_default();
308
                } else {
309
                    if (default_value_const) {
310
                        if (default_value_null_const) {
311
                            null_map_data[i] = 1;
312
                            col_to->insert_default();
313
                        } else {
314
                            col_to->insert_data(constant_default_value.data,
315
                                                constant_default_value.size);
316
                        }
317
                    } else {
318
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
319
                            null_map_data[i] = 1;
320
                            col_to->insert_default();
321
                            continue;
322
                        }
323
                        auto value = default_value_str_col->get_data_at(i);
324
                        col_to->insert_data(value.data, value.size);
325
                    }
326
                }
327
17
            }
328
59
        }
329
330
34
        if (is_nullable) {
331
34
            block.replace_by_position(
332
34
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
333
34
        } else {
334
0
            block.replace_by_position(result, std::move(col_to));
335
0
        }
336
337
34
        return Status::OK();
338
34
    }
_ZNK5doris22FunctionJsonbParseBaseILNS_12NullalbeModeE1ELNS_19JsonbParseErrorModeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
183
16
                        uint32_t result, size_t input_rows_count) const override {
184
16
        auto&& [col_from, col_from_is_const] =
185
16
                unpack_if_const(block.get_by_position(arguments[0]).column);
186
187
16
        if (col_from_is_const && col_from->is_null_at(0)) {
188
1
            auto col_str = ColumnString::create();
189
1
            col_str->insert_default();
190
1
            auto null_map = ColumnUInt8::create(1, 1);
191
1
            auto nullable_col = ColumnNullable::create(std::move(col_str), std::move(null_map));
192
1
            block.get_by_position(result).column =
193
1
                    ColumnConst::create(std::move(nullable_col), input_rows_count);
194
1
            return Status::OK();
195
1
        }
196
197
15
        auto null_map = ColumnUInt8::create(0, 0);
198
15
        bool is_nullable = false;
199
200
15
        switch (nullable_mode) {
201
0
        case NullalbeMode::NULLABLE: {
202
0
            is_nullable = true;
203
0
            break;
204
0
        }
205
15
        case NullalbeMode::FOLLOW_INPUT: {
206
28
            for (auto arg : arguments) {
207
28
                is_nullable |= block.get_by_position(arg).type->is_nullable();
208
28
            }
209
15
            break;
210
0
        }
211
15
        }
212
213
15
        if (is_nullable) {
214
14
            null_map = ColumnUInt8::create(input_rows_count, 0);
215
14
        }
216
217
15
        const ColumnString* col_from_string = nullptr;
218
15
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(col_from.get())) {
219
12
            VectorizedUtils::update_null_map(null_map->get_data(),
220
12
                                             nullable_col->get_null_map_data(), col_from_is_const);
221
12
            col_from_string =
222
12
                    assert_cast<const ColumnString*>(nullable_col->get_nested_column_ptr().get());
223
12
        } else {
224
3
            col_from_string = assert_cast<const ColumnString*>(col_from.get());
225
3
        }
226
227
15
        StringRef constant_default_value;
228
15
        bool default_value_const = false;
229
15
        bool default_value_null_const = false;
230
15
        ColumnPtr default_value_col;
231
15
        JsonBinaryValue default_jsonb_value_parser;
232
15
        const ColumnString* default_value_str_col = nullptr;
233
15
        const NullMap* default_value_nullmap = nullptr;
234
15
        if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) {
235
15
            auto* state = reinterpret_cast<FunctionJsonbParseState*>(
236
15
                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
237
15
            if (state && state->has_const_default_value) {
238
7
                constant_default_value = state->default_value;
239
7
                default_value_null_const = state->default_is_null;
240
7
                default_value_const = true;
241
8
            } else if (arguments.size() > 1) {
242
8
                if (block.get_by_position(arguments[1]).type->get_primitive_type() !=
243
8
                    PrimitiveType::TYPE_JSONB) {
244
1
                    return Status::InvalidArgument(
245
1
                            "{} second argument should be jsonb type, but got {}", get_name(),
246
1
                            block.get_by_position(arguments[1]).type->get_name());
247
1
                }
248
7
                std::tie(default_value_col, default_value_const) =
249
7
                        unpack_if_const(block.get_by_position(arguments[1]).column);
250
7
                if (default_value_const) {
251
1
                    const JsonbDocument* default_value_doc = nullptr;
252
1
                    if (default_value_col->is_null_at(0)) {
253
1
                        default_value_null_const = true;
254
1
                    } else {
255
0
                        auto data = default_value_col->get_data_at(0);
256
0
                        RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data.data, data.size,
257
0
                                                                              &default_value_doc));
258
0
                        constant_default_value = data;
259
0
                    }
260
6
                } else {
261
6
                    if (const auto* nullable_col =
262
6
                                check_and_get_column<ColumnNullable>(default_value_col.get())) {
263
4
                        default_value_str_col = assert_cast<const ColumnString*>(
264
4
                                nullable_col->get_nested_column_ptr().get());
265
4
                        default_value_nullmap = &(nullable_col->get_null_map_data());
266
4
                    } else {
267
2
                        default_value_str_col =
268
2
                                assert_cast<const ColumnString*>(default_value_col.get());
269
2
                    }
270
6
                }
271
7
            } else if (arguments.size() == 1) {
272
                // parse default value '{}' should always success.
273
0
                RETURN_IF_ERROR(default_jsonb_value_parser.from_json_string(std::string("{}")));
274
0
                default_value_const = true;
275
0
                constant_default_value.data = default_jsonb_value_parser.value();
276
0
                constant_default_value.size = default_jsonb_value_parser.size();
277
0
            }
278
15
        }
279
280
14
        auto col_to = ColumnString::create();
281
282
15
        col_to->reserve(input_rows_count);
283
284
15
        auto& null_map_data = null_map->get_data();
285
286
        // parser can be reused for performance
287
15
        JsonBinaryValue jsonb_value;
288
289
1.14k
        for (size_t i = 0; i < input_rows_count; ++i) {
290
1.12k
            if (is_nullable && null_map_data[i]) {
291
6
                col_to->insert_default();
292
6
                continue;
293
6
            }
294
295
1.11k
            auto index = index_check_const(i, col_from_is_const);
296
1.11k
            const auto& val = col_from_string->get_data_at(index);
297
1.11k
            auto st = jsonb_value.from_json_string(val.data, val.size);
298
1.11k
            if (st.ok()) {
299
                // insert jsonb format data
300
1.08k
                col_to->insert_data(jsonb_value.value(), jsonb_value.size());
301
1.08k
            } else {
302
                if constexpr (parse_error_handle_mode == JsonbParseErrorMode::FAIL) {
303
                    return Status::InvalidArgument(
304
                            "Parse json document failed at row {}, error: {}", i, st.to_string());
305
                } else if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_NULL) {
306
                    null_map_data[i] = 1;
307
                    col_to->insert_default();
308
31
                } else {
309
31
                    if (default_value_const) {
310
9
                        if (default_value_null_const) {
311
3
                            null_map_data[i] = 1;
312
3
                            col_to->insert_default();
313
6
                        } else {
314
6
                            col_to->insert_data(constant_default_value.data,
315
6
                                                constant_default_value.size);
316
6
                        }
317
22
                    } else {
318
22
                        if (default_value_nullmap && (*default_value_nullmap)[i]) {
319
3
                            null_map_data[i] = 1;
320
3
                            col_to->insert_default();
321
3
                            continue;
322
3
                        }
323
19
                        auto value = default_value_str_col->get_data_at(i);
324
19
                        col_to->insert_data(value.data, value.size);
325
19
                    }
326
31
                }
327
31
            }
328
1.11k
        }
329
330
18
        if (is_nullable) {
331
14
            block.replace_by_position(
332
14
                    result, ColumnNullable::create(std::move(col_to), std::move(null_map)));
333
14
        } else {
334
4
            block.replace_by_position(result, std::move(col_to));
335
4
        }
336
337
18
        return Status::OK();
338
15
    }
339
};
340
341
// jsonb_parse return type nullable as input
342
using FunctionJsonbParse =
343
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::FAIL>;
344
using FunctionJsonbParseErrorNull =
345
        FunctionJsonbParseBase<NullalbeMode::NULLABLE, JsonbParseErrorMode::RETURN_NULL>;
346
using FunctionJsonbParseErrorValue =
347
        FunctionJsonbParseBase<NullalbeMode::FOLLOW_INPUT, JsonbParseErrorMode::RETURN_VALUE>;
348
349
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
350
template <typename Impl>
351
class FunctionJsonbExtract : public IFunction {
352
public:
353
    static constexpr auto name = Impl::name;
354
    static constexpr auto alias = Impl::alias;
355
1.73k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE6createEv
Line
Count
Source
355
149
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE6createEv
Line
Count
Source
355
148
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE6createEv
Line
Count
Source
355
1.41k
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
_ZN5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE6createEv
Line
Count
Source
355
18
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
356
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE8get_nameB5cxx11Ev
357
1.70k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE11is_variadicEv
Line
Count
Source
357
141
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE11is_variadicEv
Line
Count
Source
357
140
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE11is_variadicEv
Line
Count
Source
357
1.41k
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE11is_variadicEv
Line
Count
Source
357
10
    bool is_variadic() const override { return true; }
358
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE23get_number_of_argumentsEv
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE23get_number_of_argumentsEv
Line
Count
Source
358
1
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE23get_number_of_argumentsEv
359
14.4k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE36use_default_implementation_for_nullsEv
Line
Count
Source
359
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE36use_default_implementation_for_nullsEv
Line
Count
Source
359
1.46k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE36use_default_implementation_for_nullsEv
Line
Count
Source
359
11.4k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE36use_default_implementation_for_nullsEv
Line
Count
Source
359
18
    bool use_default_implementation_for_nulls() const override { return false; }
360
1.69k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
1.69k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
362
1.69k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
140
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
140
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
362
140
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
139
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
139
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
362
139
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
1.40k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
1.40k
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
362
1.40k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
360
9
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
361
9
        return make_nullable(std::make_shared<typename Impl::ReturnType>());
362
9
    }
363
32
    DataTypes get_variadic_argument_types_impl() const override {
364
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
365
            return Impl::get_variadic_argument_types_impl();
366
32
        } else {
367
32
            return {};
368
32
        }
369
32
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
363
8
    DataTypes get_variadic_argument_types_impl() const override {
364
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
365
            return Impl::get_variadic_argument_types_impl();
366
8
        } else {
367
8
            return {};
368
8
        }
369
8
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE32get_variadic_argument_types_implEv
Line
Count
Source
363
8
    DataTypes get_variadic_argument_types_impl() const override {
364
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
365
            return Impl::get_variadic_argument_types_impl();
366
8
        } else {
367
8
            return {};
368
8
        }
369
8
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE32get_variadic_argument_types_implEv
Line
Count
Source
363
8
    DataTypes get_variadic_argument_types_impl() const override {
364
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
365
            return Impl::get_variadic_argument_types_impl();
366
8
        } else {
367
8
            return {};
368
8
        }
369
8
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE32get_variadic_argument_types_implEv
Line
Count
Source
363
8
    DataTypes get_variadic_argument_types_impl() const override {
364
        if constexpr (HasGetVariadicArgumentTypesImpl<Impl>) {
365
            return Impl::get_variadic_argument_types_impl();
366
8
        } else {
367
8
            return {};
368
8
        }
369
8
    }
370
371
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
372
12.7k
                        uint32_t result, size_t input_rows_count) const override {
373
12.7k
        DORIS_CHECK_GE(arguments.size(), 2);
374
375
12.7k
        ColumnPtr jsonb_data_column;
376
12.7k
        bool jsonb_data_const = false;
377
12.7k
        const NullMap* data_null_map = nullptr;
378
379
12.7k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
380
12.7k
            PrimitiveType::TYPE_JSONB) {
381
1
            return Status::InvalidArgument(
382
1
                    "jsonb_extract first argument should be json type, but got {}",
383
1
                    block.get_by_position(arguments[0]).type->get_name());
384
1
        }
385
386
        // prepare jsonb data column
387
12.7k
        std::tie(jsonb_data_column, jsonb_data_const) =
388
12.7k
                unpack_if_const(block.get_by_position(arguments[0]).column);
389
12.7k
        if (const auto* nullable_column =
390
12.7k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
391
10.8k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
392
10.8k
            data_null_map = &nullable_column->get_null_map_data();
393
10.8k
        }
394
12.7k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
395
12.7k
        const auto& loffsets =
396
12.7k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
397
398
        // prepare parse path column prepare
399
12.7k
        std::vector<const ColumnString*> jsonb_path_columns;
400
12.7k
        std::vector<bool> path_const(arguments.size() - 1);
401
12.7k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
402
25.7k
        for (int i = 0; i < arguments.size() - 1; ++i) {
403
13.0k
            ColumnPtr path_column;
404
13.0k
            bool is_const = false;
405
13.0k
            std::tie(path_column, is_const) =
406
13.0k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
407
13.0k
            path_const[i] = is_const;
408
13.0k
            if (const auto* nullable_column =
409
13.0k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
410
70
                path_column = nullable_column->get_nested_column_ptr();
411
70
                path_null_maps[i] = &nullable_column->get_null_map_data();
412
70
            }
413
13.0k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
414
13.0k
        }
415
416
12.7k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
417
12.7k
        auto res = Impl::ColumnType::create();
418
419
        // execute Impl
420
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
421
11.3k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
422
11.3k
            auto& res_data = res->get_chars();
423
11.3k
            auto& res_offsets = res->get_offsets();
424
11.3k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
425
11.3k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
426
11.3k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
427
11.3k
        } else {
428
            // not support other extract type for now (e.g. int, double, ...)
429
1.32k
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
430
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
431
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
432
433
1.32k
            auto create_all_null_result = [&]() {
434
2
                res = Impl::ColumnType::create();
435
2
                res->insert_default();
436
2
                auto nullable_column =
437
2
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
438
2
                auto const_column =
439
2
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
440
2
                block.get_by_position(result).column = std::move(const_column);
441
2
                return Status::OK();
442
2
            };
443
444
1.32k
            if (jsonb_data_const) {
445
2
                if (data_null_map && (*data_null_map)[0]) {
446
1
                    return create_all_null_result();
447
1
                }
448
449
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
450
1
                                                    rdata, roffsets, path_null_maps[0],
451
1
                                                    res->get_data(), null_map->get_data()));
452
1.32k
            } else if (path_const[0]) {
453
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
454
1
                    return create_all_null_result();
455
1
                }
456
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
457
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
458
1.32k
                                                    res->get_data(), null_map->get_data()));
459
1.32k
            } else {
460
4
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
461
4
                                                    roffsets, path_null_maps[0], res->get_data(),
462
4
                                                    null_map->get_data()));
463
4
            }
464
1.32k
        }
465
466
12.7k
        block.get_by_position(result).column =
467
12.7k
                ColumnNullable::create(std::move(res), std::move(null_map));
468
12.7k
        return Status::OK();
469
12.7k
    }
_ZNK5doris20FunctionJsonbExtractINS_13JsonbTypeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
1.32k
                        uint32_t result, size_t input_rows_count) const override {
373
1.32k
        DORIS_CHECK_GE(arguments.size(), 2);
374
375
1.32k
        ColumnPtr jsonb_data_column;
376
1.32k
        bool jsonb_data_const = false;
377
1.32k
        const NullMap* data_null_map = nullptr;
378
379
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
380
1.32k
            PrimitiveType::TYPE_JSONB) {
381
0
            return Status::InvalidArgument(
382
0
                    "jsonb_extract first argument should be json type, but got {}",
383
0
                    block.get_by_position(arguments[0]).type->get_name());
384
0
        }
385
386
        // prepare jsonb data column
387
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
388
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
389
1.32k
        if (const auto* nullable_column =
390
1.32k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
391
1.14k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
392
1.14k
            data_null_map = &nullable_column->get_null_map_data();
393
1.14k
        }
394
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
395
1.32k
        const auto& loffsets =
396
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
397
398
        // prepare parse path column prepare
399
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
400
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
401
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
402
2.65k
        for (int i = 0; i < arguments.size() - 1; ++i) {
403
1.32k
            ColumnPtr path_column;
404
1.32k
            bool is_const = false;
405
1.32k
            std::tie(path_column, is_const) =
406
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
407
1.32k
            path_const[i] = is_const;
408
1.32k
            if (const auto* nullable_column =
409
1.32k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
410
5
                path_column = nullable_column->get_nested_column_ptr();
411
5
                path_null_maps[i] = &nullable_column->get_null_map_data();
412
5
            }
413
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
414
1.32k
        }
415
416
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
417
1.32k
        auto res = Impl::ColumnType::create();
418
419
        // execute Impl
420
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
421
1.32k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
422
1.32k
            auto& res_data = res->get_chars();
423
1.32k
            auto& res_offsets = res->get_offsets();
424
1.32k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
425
1.32k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
426
1.32k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
427
        } else {
428
            // not support other extract type for now (e.g. int, double, ...)
429
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
430
            const auto& rdata = jsonb_path_columns[0]->get_chars();
431
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
432
433
            auto create_all_null_result = [&]() {
434
                res = Impl::ColumnType::create();
435
                res->insert_default();
436
                auto nullable_column =
437
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
438
                auto const_column =
439
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
440
                block.get_by_position(result).column = std::move(const_column);
441
                return Status::OK();
442
            };
443
444
            if (jsonb_data_const) {
445
                if (data_null_map && (*data_null_map)[0]) {
446
                    return create_all_null_result();
447
                }
448
449
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
450
                                                    rdata, roffsets, path_null_maps[0],
451
                                                    res->get_data(), null_map->get_data()));
452
            } else if (path_const[0]) {
453
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
454
                    return create_all_null_result();
455
                }
456
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
457
                                                    jsonb_path_columns[0]->get_data_at(0),
458
                                                    res->get_data(), null_map->get_data()));
459
            } else {
460
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
461
                                                    roffsets, path_null_maps[0], res->get_data(),
462
                                                    null_map->get_data()));
463
            }
464
        }
465
466
1.32k
        block.get_by_position(result).column =
467
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
468
1.32k
        return Status::OK();
469
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_18JsonbExtractIsnullEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
1.32k
                        uint32_t result, size_t input_rows_count) const override {
373
1.32k
        DORIS_CHECK_GE(arguments.size(), 2);
374
375
1.32k
        ColumnPtr jsonb_data_column;
376
1.32k
        bool jsonb_data_const = false;
377
1.32k
        const NullMap* data_null_map = nullptr;
378
379
1.32k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
380
1.32k
            PrimitiveType::TYPE_JSONB) {
381
0
            return Status::InvalidArgument(
382
0
                    "jsonb_extract first argument should be json type, but got {}",
383
0
                    block.get_by_position(arguments[0]).type->get_name());
384
0
        }
385
386
        // prepare jsonb data column
387
1.32k
        std::tie(jsonb_data_column, jsonb_data_const) =
388
1.32k
                unpack_if_const(block.get_by_position(arguments[0]).column);
389
1.32k
        if (const auto* nullable_column =
390
1.32k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
391
1.14k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
392
1.14k
            data_null_map = &nullable_column->get_null_map_data();
393
1.14k
        }
394
1.32k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
395
1.32k
        const auto& loffsets =
396
1.32k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
397
398
        // prepare parse path column prepare
399
1.32k
        std::vector<const ColumnString*> jsonb_path_columns;
400
1.32k
        std::vector<bool> path_const(arguments.size() - 1);
401
1.32k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
402
2.65k
        for (int i = 0; i < arguments.size() - 1; ++i) {
403
1.32k
            ColumnPtr path_column;
404
1.32k
            bool is_const = false;
405
1.32k
            std::tie(path_column, is_const) =
406
1.32k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
407
1.32k
            path_const[i] = is_const;
408
1.32k
            if (const auto* nullable_column =
409
1.32k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
410
4
                path_column = nullable_column->get_nested_column_ptr();
411
4
                path_null_maps[i] = &nullable_column->get_null_map_data();
412
4
            }
413
1.32k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
414
1.32k
        }
415
416
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
417
1.32k
        auto res = Impl::ColumnType::create();
418
419
        // execute Impl
420
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
421
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
422
            auto& res_data = res->get_chars();
423
            auto& res_offsets = res->get_offsets();
424
            RETURN_IF_ERROR(Impl::vector_vector_v2(
425
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
426
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
427
1.32k
        } else {
428
            // not support other extract type for now (e.g. int, double, ...)
429
1.32k
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
430
1.32k
            const auto& rdata = jsonb_path_columns[0]->get_chars();
431
1.32k
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
432
433
1.32k
            auto create_all_null_result = [&]() {
434
1.32k
                res = Impl::ColumnType::create();
435
1.32k
                res->insert_default();
436
1.32k
                auto nullable_column =
437
1.32k
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
438
1.32k
                auto const_column =
439
1.32k
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
440
1.32k
                block.get_by_position(result).column = std::move(const_column);
441
1.32k
                return Status::OK();
442
1.32k
            };
443
444
1.32k
            if (jsonb_data_const) {
445
2
                if (data_null_map && (*data_null_map)[0]) {
446
1
                    return create_all_null_result();
447
1
                }
448
449
1
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
450
1
                                                    rdata, roffsets, path_null_maps[0],
451
1
                                                    res->get_data(), null_map->get_data()));
452
1.32k
            } else if (path_const[0]) {
453
1.32k
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
454
1
                    return create_all_null_result();
455
1
                }
456
1.32k
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
457
1.32k
                                                    jsonb_path_columns[0]->get_data_at(0),
458
1.32k
                                                    res->get_data(), null_map->get_data()));
459
1.32k
            } else {
460
4
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
461
4
                                                    roffsets, path_null_maps[0], res->get_data(),
462
4
                                                    null_map->get_data()));
463
4
            }
464
1.32k
        }
465
466
1.32k
        block.get_by_position(result).column =
467
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
468
1.32k
        return Status::OK();
469
1.32k
    }
_ZNK5doris20FunctionJsonbExtractINS_17JsonbExtractJsonbEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
10.0k
                        uint32_t result, size_t input_rows_count) const override {
373
10.0k
        DORIS_CHECK_GE(arguments.size(), 2);
374
375
10.0k
        ColumnPtr jsonb_data_column;
376
10.0k
        bool jsonb_data_const = false;
377
10.0k
        const NullMap* data_null_map = nullptr;
378
379
10.0k
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
380
10.0k
            PrimitiveType::TYPE_JSONB) {
381
1
            return Status::InvalidArgument(
382
1
                    "jsonb_extract first argument should be json type, but got {}",
383
1
                    block.get_by_position(arguments[0]).type->get_name());
384
1
        }
385
386
        // prepare jsonb data column
387
10.0k
        std::tie(jsonb_data_column, jsonb_data_const) =
388
10.0k
                unpack_if_const(block.get_by_position(arguments[0]).column);
389
10.0k
        if (const auto* nullable_column =
390
10.0k
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
391
8.58k
            jsonb_data_column = nullable_column->get_nested_column_ptr();
392
8.58k
            data_null_map = &nullable_column->get_null_map_data();
393
8.58k
        }
394
10.0k
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
395
10.0k
        const auto& loffsets =
396
10.0k
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
397
398
        // prepare parse path column prepare
399
10.0k
        std::vector<const ColumnString*> jsonb_path_columns;
400
10.0k
        std::vector<bool> path_const(arguments.size() - 1);
401
10.0k
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
402
20.4k
        for (int i = 0; i < arguments.size() - 1; ++i) {
403
10.3k
            ColumnPtr path_column;
404
10.3k
            bool is_const = false;
405
10.3k
            std::tie(path_column, is_const) =
406
10.3k
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
407
10.3k
            path_const[i] = is_const;
408
10.3k
            if (const auto* nullable_column =
409
10.3k
                        check_and_get_column<ColumnNullable>(path_column.get())) {
410
60
                path_column = nullable_column->get_nested_column_ptr();
411
60
                path_null_maps[i] = &nullable_column->get_null_map_data();
412
60
            }
413
10.3k
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
414
10.3k
        }
415
416
10.0k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
417
10.0k
        auto res = Impl::ColumnType::create();
418
419
        // execute Impl
420
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
421
10.0k
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
422
10.0k
            auto& res_data = res->get_chars();
423
10.0k
            auto& res_offsets = res->get_offsets();
424
10.0k
            RETURN_IF_ERROR(Impl::vector_vector_v2(
425
10.0k
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
426
10.0k
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
427
        } else {
428
            // not support other extract type for now (e.g. int, double, ...)
429
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
430
            const auto& rdata = jsonb_path_columns[0]->get_chars();
431
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
432
433
            auto create_all_null_result = [&]() {
434
                res = Impl::ColumnType::create();
435
                res->insert_default();
436
                auto nullable_column =
437
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
438
                auto const_column =
439
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
440
                block.get_by_position(result).column = std::move(const_column);
441
                return Status::OK();
442
            };
443
444
            if (jsonb_data_const) {
445
                if (data_null_map && (*data_null_map)[0]) {
446
                    return create_all_null_result();
447
                }
448
449
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
450
                                                    rdata, roffsets, path_null_maps[0],
451
                                                    res->get_data(), null_map->get_data()));
452
            } else if (path_const[0]) {
453
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
454
                    return create_all_null_result();
455
                }
456
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
457
                                                    jsonb_path_columns[0]->get_data_at(0),
458
                                                    res->get_data(), null_map->get_data()));
459
            } else {
460
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
461
                                                    roffsets, path_null_maps[0], res->get_data(),
462
                                                    null_map->get_data()));
463
            }
464
        }
465
466
10.0k
        block.get_by_position(result).column =
467
10.0k
                ColumnNullable::create(std::move(res), std::move(null_map));
468
10.0k
        return Status::OK();
469
10.0k
    }
_ZNK5doris20FunctionJsonbExtractINS_25JsonbExtractJsonbNoQuotesEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
372
9
                        uint32_t result, size_t input_rows_count) const override {
373
9
        DORIS_CHECK_GE(arguments.size(), 2);
374
375
9
        ColumnPtr jsonb_data_column;
376
9
        bool jsonb_data_const = false;
377
9
        const NullMap* data_null_map = nullptr;
378
379
9
        if (block.get_by_position(arguments[0]).type->get_primitive_type() !=
380
9
            PrimitiveType::TYPE_JSONB) {
381
0
            return Status::InvalidArgument(
382
0
                    "jsonb_extract first argument should be json type, but got {}",
383
0
                    block.get_by_position(arguments[0]).type->get_name());
384
0
        }
385
386
        // prepare jsonb data column
387
9
        std::tie(jsonb_data_column, jsonb_data_const) =
388
9
                unpack_if_const(block.get_by_position(arguments[0]).column);
389
9
        if (const auto* nullable_column =
390
9
                    check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
391
9
            jsonb_data_column = nullable_column->get_nested_column_ptr();
392
9
            data_null_map = &nullable_column->get_null_map_data();
393
9
        }
394
9
        const auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
395
9
        const auto& loffsets =
396
9
                assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
397
398
        // prepare parse path column prepare
399
9
        std::vector<const ColumnString*> jsonb_path_columns;
400
9
        std::vector<bool> path_const(arguments.size() - 1);
401
9
        std::vector<const NullMap*> path_null_maps(arguments.size() - 1, nullptr);
402
22
        for (int i = 0; i < arguments.size() - 1; ++i) {
403
13
            ColumnPtr path_column;
404
13
            bool is_const = false;
405
13
            std::tie(path_column, is_const) =
406
13
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
407
13
            path_const[i] = is_const;
408
13
            if (const auto* nullable_column =
409
13
                        check_and_get_column<ColumnNullable>(path_column.get())) {
410
1
                path_column = nullable_column->get_nested_column_ptr();
411
1
                path_null_maps[i] = &nullable_column->get_null_map_data();
412
1
            }
413
13
            jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
414
13
        }
415
416
9
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
417
9
        auto res = Impl::ColumnType::create();
418
419
        // execute Impl
420
        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString> ||
421
9
                      std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
422
9
            auto& res_data = res->get_chars();
423
9
            auto& res_offsets = res->get_offsets();
424
9
            RETURN_IF_ERROR(Impl::vector_vector_v2(
425
9
                    context, ldata, loffsets, data_null_map, jsonb_data_const, jsonb_path_columns,
426
9
                    path_null_maps, path_const, res_data, res_offsets, null_map->get_data()));
427
        } else {
428
            // not support other extract type for now (e.g. int, double, ...)
429
            DORIS_CHECK_EQ(jsonb_path_columns.size(), 1);
430
            const auto& rdata = jsonb_path_columns[0]->get_chars();
431
            const auto& roffsets = jsonb_path_columns[0]->get_offsets();
432
433
            auto create_all_null_result = [&]() {
434
                res = Impl::ColumnType::create();
435
                res->insert_default();
436
                auto nullable_column =
437
                        ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
438
                auto const_column =
439
                        ColumnConst::create(std::move(nullable_column), input_rows_count);
440
                block.get_by_position(result).column = std::move(const_column);
441
                return Status::OK();
442
            };
443
444
            if (jsonb_data_const) {
445
                if (data_null_map && (*data_null_map)[0]) {
446
                    return create_all_null_result();
447
                }
448
449
                RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0),
450
                                                    rdata, roffsets, path_null_maps[0],
451
                                                    res->get_data(), null_map->get_data()));
452
            } else if (path_const[0]) {
453
                if (path_null_maps[0] && (*path_null_maps[0])[0]) {
454
                    return create_all_null_result();
455
                }
456
                RETURN_IF_ERROR(Impl::vector_scalar(context, ldata, loffsets, data_null_map,
457
                                                    jsonb_path_columns[0]->get_data_at(0),
458
                                                    res->get_data(), null_map->get_data()));
459
            } else {
460
                RETURN_IF_ERROR(Impl::vector_vector(context, ldata, loffsets, data_null_map, rdata,
461
                                                    roffsets, path_null_maps[0], res->get_data(),
462
                                                    null_map->get_data()));
463
            }
464
        }
465
466
9
        block.get_by_position(result).column =
467
9
                ColumnNullable::create(std::move(res), std::move(null_map));
468
9
        return Status::OK();
469
9
    }
470
};
471
472
class FunctionJsonbKeys : public IFunction {
473
public:
474
    static constexpr auto name = "json_keys";
475
    static constexpr auto alias = "jsonb_keys";
476
52
    static FunctionPtr create() { return std::make_shared<FunctionJsonbKeys>(); }
477
0
    String get_name() const override { return name; }
478
44
    bool is_variadic() const override { return true; }
479
0
    size_t get_number_of_arguments() const override { return 0; }
480
481
148
    bool use_default_implementation_for_nulls() const override { return false; }
482
483
43
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
484
43
        return make_nullable(
485
43
                std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
486
43
    }
487
488
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
489
105
                        uint32_t result, size_t input_rows_count) const override {
490
105
        DORIS_CHECK_GE(arguments.size(), 1);
491
105
        DORIS_CHECK(arguments.size() == 1 || arguments.size() == 2)
492
0
                << "json_keys should have 1 or 2 arguments, but got " << arguments.size();
493
494
105
        const NullMap* data_null_map = nullptr;
495
105
        const ColumnString* col_from_string = nullptr;
496
        // prepare jsonb data column
497
105
        auto&& [jsonb_data_column, json_data_const] =
498
105
                unpack_if_const(block.get_by_position(arguments[0]).column);
499
105
        if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
500
99
            col_from_string =
501
99
                    assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
502
99
            data_null_map = &nullable->get_null_map_data();
503
99
        } else {
504
6
            col_from_string = assert_cast<const ColumnString*>(jsonb_data_column.get());
505
6
        }
506
507
        // prepare parse path column prepare, maybe we do not have path column
508
105
        ColumnPtr jsonb_path_column = nullptr;
509
105
        const ColumnString* jsonb_path_col = nullptr;
510
105
        bool path_const = false;
511
105
        const NullMap* path_null_map = nullptr;
512
105
        if (arguments.size() == 2) {
513
            // we have should have a ColumnString for path
514
78
            std::tie(jsonb_path_column, path_const) =
515
78
                    unpack_if_const(block.get_by_position(arguments[1]).column);
516
78
            if (const auto* nullable =
517
78
                        check_and_get_column<ColumnNullable>(jsonb_path_column.get())) {
518
10
                jsonb_path_column = nullable->get_nested_column_ptr();
519
10
                path_null_map = &nullable->get_null_map_data();
520
10
            }
521
78
            jsonb_path_col = check_and_get_column<ColumnString>(jsonb_path_column.get());
522
78
        }
523
524
105
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
525
105
        NullMap& res_null_map = null_map->get_data();
526
527
105
        auto dst_arr = ColumnArray::create(
528
105
                ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()),
529
105
                ColumnArray::ColumnOffsets::create());
530
105
        auto& dst_nested_column = assert_cast<ColumnNullable&>(dst_arr->get_data());
531
532
105
        Status st = std::visit(
533
105
                [&](auto data_const, auto has_path, auto path_const) {
534
105
                    return inner_loop_impl<data_const, has_path, path_const>(
535
105
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
536
105
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
537
105
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
533
27
                [&](auto data_const, auto has_path, auto path_const) {
534
27
                    return inner_loop_impl<data_const, has_path, path_const>(
535
27
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
536
27
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
537
27
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
533
28
                [&](auto data_const, auto has_path, auto path_const) {
534
28
                    return inner_loop_impl<data_const, has_path, path_const>(
535
28
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
536
28
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
537
28
                },
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
533
48
                [&](auto data_const, auto has_path, auto path_const) {
534
48
                    return inner_loop_impl<data_const, has_path, path_const>(
535
48
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
536
48
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
537
48
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
_ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
533
2
                [&](auto data_const, auto has_path, auto path_const) {
534
2
                    return inner_loop_impl<data_const, has_path, path_const>(
535
2
                            input_rows_count, *dst_arr, dst_nested_column, res_null_map,
536
2
                            *col_from_string, data_null_map, jsonb_path_col, path_null_map);
537
2
                },
Unexecuted instantiation: _ZZNK5doris17FunctionJsonbKeys12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
538
105
                make_bool_variant(json_data_const), make_bool_variant(jsonb_path_column),
539
105
                make_bool_variant(path_const));
540
105
        if (!st.ok()) {
541
12
            return st;
542
12
        }
543
93
        block.get_by_position(result).column =
544
93
                ColumnNullable::create(std::move(dst_arr), std::move(null_map));
545
93
        return st;
546
105
    }
547
548
private:
549
    template <bool JSONB_DATA_CONST, bool JSONB_PATH_PARAM, bool JSON_PATH_CONST>
550
    static ALWAYS_INLINE Status inner_loop_impl(size_t input_rows_count, ColumnArray& dst_arr,
551
                                                ColumnNullable& dst_nested_column,
552
                                                NullMap& res_null_map,
553
                                                const ColumnString& col_from_string,
554
                                                const NullMap* jsonb_data_nullmap,
555
                                                const ColumnString* jsonb_path_column,
556
105
                                                const NullMap* path_null_map) {
557
        // if path is const, we just need to parse it once
558
105
        JsonbPath const_path;
559
105
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
560
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
561
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
562
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
563
1
                                               r_raw_ref.to_string());
564
1
            }
565
566
47
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
567
2
                return Status::InvalidJsonPath(
568
2
                        "In this situation, path expressions may not contain the * and ** tokens "
569
2
                        "or an array range.");
570
2
            }
571
47
        }
572
573
385
        for (size_t i = 0; i < input_rows_count; ++i) {
574
272
            auto index = index_check_const(i, JSONB_DATA_CONST);
575
            // if jsonb data is null or path column is null , we should return null
576
272
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
577
23
                res_null_map[i] = 1;
578
23
                dst_arr.insert_default();
579
23
                continue;
580
23
            }
581
249
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
582
73
                if (path_null_map && (*path_null_map)[i]) {
583
8
                    res_null_map[i] = 1;
584
8
                    dst_arr.insert_default();
585
8
                    continue;
586
8
                }
587
73
            }
588
589
65
            auto json_data = col_from_string.get_data_at(index);
590
249
            const JsonbDocument* doc = nullptr;
591
249
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
592
249
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
593
0
                dst_arr.clear();
594
0
                return Status::InvalidArgument("jsonb data is invalid");
595
0
            }
596
249
            const JsonbValue* obj_val;
597
249
            JsonbFindResult find_result;
598
249
            if constexpr (JSONB_PATH_PARAM) {
599
195
                if constexpr (!JSON_PATH_CONST) {
600
73
                    auto data = jsonb_path_column->get_data_at(i);
601
73
                    JsonbPath path;
602
73
                    if (!path.seek(data.data, data.size)) {
603
5
                        return Status::InvalidArgument(
604
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
605
5
                                std::string_view(data.data, data.size), i);
606
5
                    }
607
608
68
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
609
4
                        return Status::InvalidJsonPath(
610
4
                                "In this situation, path expressions may not contain the * and ** "
611
4
                                "tokens "
612
4
                                "or an array range. at row: {}",
613
4
                                i);
614
4
                    }
615
64
                    find_result = doc->getValue()->findValue(path);
616
122
                } else {
617
122
                    find_result = doc->getValue()->findValue(const_path);
618
122
                }
619
0
                obj_val = find_result.value;
620
195
            } else {
621
54
                obj_val = doc->getValue();
622
54
            }
623
624
249
            if (!obj_val || !obj_val->isObject()) {
625
                // if jsonb data is not object we should return null
626
182
                res_null_map[i] = 1;
627
182
                dst_arr.insert_default();
628
182
                continue;
629
182
            }
630
67
            const auto* obj = obj_val->unpack<ObjectVal>();
631
76
            for (const auto& it : *obj) {
632
76
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
633
76
            }
634
67
            dst_arr.get_offsets().push_back(dst_nested_column.size());
635
67
        } //for
636
113
        return Status::OK();
637
105
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
556
27
                                                const NullMap* path_null_map) {
557
        // if path is const, we just need to parse it once
558
27
        JsonbPath const_path;
559
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
560
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
561
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
562
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
563
                                               r_raw_ref.to_string());
564
            }
565
566
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
567
                return Status::InvalidJsonPath(
568
                        "In this situation, path expressions may not contain the * and ** tokens "
569
                        "or an array range.");
570
            }
571
        }
572
573
85
        for (size_t i = 0; i < input_rows_count; ++i) {
574
58
            auto index = index_check_const(i, JSONB_DATA_CONST);
575
            // if jsonb data is null or path column is null , we should return null
576
58
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
577
4
                res_null_map[i] = 1;
578
4
                dst_arr.insert_default();
579
4
                continue;
580
4
            }
581
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
582
                if (path_null_map && (*path_null_map)[i]) {
583
                    res_null_map[i] = 1;
584
                    dst_arr.insert_default();
585
                    continue;
586
                }
587
            }
588
589
54
            auto json_data = col_from_string.get_data_at(index);
590
54
            const JsonbDocument* doc = nullptr;
591
54
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
592
54
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
593
0
                dst_arr.clear();
594
0
                return Status::InvalidArgument("jsonb data is invalid");
595
0
            }
596
54
            const JsonbValue* obj_val;
597
54
            JsonbFindResult find_result;
598
            if constexpr (JSONB_PATH_PARAM) {
599
                if constexpr (!JSON_PATH_CONST) {
600
                    auto data = jsonb_path_column->get_data_at(i);
601
                    JsonbPath path;
602
                    if (!path.seek(data.data, data.size)) {
603
                        return Status::InvalidArgument(
604
                                "Json path error: Invalid Json Path for value: {} at row: {}",
605
                                std::string_view(data.data, data.size), i);
606
                    }
607
608
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
609
                        return Status::InvalidJsonPath(
610
                                "In this situation, path expressions may not contain the * and ** "
611
                                "tokens "
612
                                "or an array range. at row: {}",
613
                                i);
614
                    }
615
                    find_result = doc->getValue()->findValue(path);
616
                } else {
617
                    find_result = doc->getValue()->findValue(const_path);
618
                }
619
                obj_val = find_result.value;
620
54
            } else {
621
54
                obj_val = doc->getValue();
622
54
            }
623
624
54
            if (!obj_val || !obj_val->isObject()) {
625
                // if jsonb data is not object we should return null
626
36
                res_null_map[i] = 1;
627
36
                dst_arr.insert_default();
628
36
                continue;
629
36
            }
630
18
            const auto* obj = obj_val->unpack<ObjectVal>();
631
36
            for (const auto& it : *obj) {
632
36
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
633
36
            }
634
18
            dst_arr.get_offsets().push_back(dst_nested_column.size());
635
18
        } //for
636
27
        return Status::OK();
637
27
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
556
28
                                                const NullMap* path_null_map) {
557
        // if path is const, we just need to parse it once
558
28
        JsonbPath const_path;
559
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
560
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
561
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
562
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
563
                                               r_raw_ref.to_string());
564
            }
565
566
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
567
                return Status::InvalidJsonPath(
568
                        "In this situation, path expressions may not contain the * and ** tokens "
569
                        "or an array range.");
570
            }
571
        }
572
573
87
        for (size_t i = 0; i < input_rows_count; ++i) {
574
55
            auto index = index_check_const(i, JSONB_DATA_CONST);
575
            // if jsonb data is null or path column is null , we should return null
576
55
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
577
6
                res_null_map[i] = 1;
578
6
                dst_arr.insert_default();
579
6
                continue;
580
6
            }
581
49
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
582
49
                if (path_null_map && (*path_null_map)[i]) {
583
4
                    res_null_map[i] = 1;
584
4
                    dst_arr.insert_default();
585
4
                    continue;
586
4
                }
587
49
            }
588
589
45
            auto json_data = col_from_string.get_data_at(index);
590
49
            const JsonbDocument* doc = nullptr;
591
49
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
592
49
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
593
0
                dst_arr.clear();
594
0
                return Status::InvalidArgument("jsonb data is invalid");
595
0
            }
596
49
            const JsonbValue* obj_val;
597
49
            JsonbFindResult find_result;
598
49
            if constexpr (JSONB_PATH_PARAM) {
599
49
                if constexpr (!JSON_PATH_CONST) {
600
49
                    auto data = jsonb_path_column->get_data_at(i);
601
49
                    JsonbPath path;
602
49
                    if (!path.seek(data.data, data.size)) {
603
5
                        return Status::InvalidArgument(
604
5
                                "Json path error: Invalid Json Path for value: {} at row: {}",
605
5
                                std::string_view(data.data, data.size), i);
606
5
                    }
607
608
44
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
609
4
                        return Status::InvalidJsonPath(
610
4
                                "In this situation, path expressions may not contain the * and ** "
611
4
                                "tokens "
612
4
                                "or an array range. at row: {}",
613
4
                                i);
614
4
                    }
615
40
                    find_result = doc->getValue()->findValue(path);
616
                } else {
617
                    find_result = doc->getValue()->findValue(const_path);
618
                }
619
0
                obj_val = find_result.value;
620
            } else {
621
                obj_val = doc->getValue();
622
            }
623
624
49
            if (!obj_val || !obj_val->isObject()) {
625
                // if jsonb data is not object we should return null
626
25
                res_null_map[i] = 1;
627
25
                dst_arr.insert_default();
628
25
                continue;
629
25
            }
630
24
            const auto* obj = obj_val->unpack<ObjectVal>();
631
24
            for (const auto& it : *obj) {
632
15
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
633
15
            }
634
24
            dst_arr.get_offsets().push_back(dst_nested_column.size());
635
24
        } //for
636
32
        return Status::OK();
637
28
    }
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb0ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
556
48
                                                const NullMap* path_null_map) {
557
        // if path is const, we just need to parse it once
558
48
        JsonbPath const_path;
559
48
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
560
48
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
561
48
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
562
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
563
1
                                               r_raw_ref.to_string());
564
1
            }
565
566
47
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
567
2
                return Status::InvalidJsonPath(
568
2
                        "In this situation, path expressions may not contain the * and ** tokens "
569
2
                        "or an array range.");
570
2
            }
571
47
        }
572
573
183
        for (size_t i = 0; i < input_rows_count; ++i) {
574
135
            auto index = index_check_const(i, JSONB_DATA_CONST);
575
            // if jsonb data is null or path column is null , we should return null
576
135
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
577
13
                res_null_map[i] = 1;
578
13
                dst_arr.insert_default();
579
13
                continue;
580
13
            }
581
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
582
                if (path_null_map && (*path_null_map)[i]) {
583
                    res_null_map[i] = 1;
584
                    dst_arr.insert_default();
585
                    continue;
586
                }
587
            }
588
589
122
            auto json_data = col_from_string.get_data_at(index);
590
122
            const JsonbDocument* doc = nullptr;
591
122
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
592
122
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
593
0
                dst_arr.clear();
594
0
                return Status::InvalidArgument("jsonb data is invalid");
595
0
            }
596
122
            const JsonbValue* obj_val;
597
122
            JsonbFindResult find_result;
598
122
            if constexpr (JSONB_PATH_PARAM) {
599
                if constexpr (!JSON_PATH_CONST) {
600
                    auto data = jsonb_path_column->get_data_at(i);
601
                    JsonbPath path;
602
                    if (!path.seek(data.data, data.size)) {
603
                        return Status::InvalidArgument(
604
                                "Json path error: Invalid Json Path for value: {} at row: {}",
605
                                std::string_view(data.data, data.size), i);
606
                    }
607
608
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
609
                        return Status::InvalidJsonPath(
610
                                "In this situation, path expressions may not contain the * and ** "
611
                                "tokens "
612
                                "or an array range. at row: {}",
613
                                i);
614
                    }
615
                    find_result = doc->getValue()->findValue(path);
616
122
                } else {
617
122
                    find_result = doc->getValue()->findValue(const_path);
618
122
                }
619
122
                obj_val = find_result.value;
620
            } else {
621
                obj_val = doc->getValue();
622
            }
623
624
122
            if (!obj_val || !obj_val->isObject()) {
625
                // if jsonb data is not object we should return null
626
113
                res_null_map[i] = 1;
627
113
                dst_arr.insert_default();
628
113
                continue;
629
113
            }
630
9
            const auto* obj = obj_val->unpack<ObjectVal>();
631
9
            for (const auto& it : *obj) {
632
9
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
633
9
            }
634
9
            dst_arr.get_offsets().push_back(dst_nested_column.size());
635
9
        } //for
636
48
        return Status::OK();
637
48
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb0ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
_ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb0EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
Line
Count
Source
556
2
                                                const NullMap* path_null_map) {
557
        // if path is const, we just need to parse it once
558
2
        JsonbPath const_path;
559
        if constexpr (JSONB_PATH_PARAM && JSON_PATH_CONST) {
560
            StringRef r_raw_ref = jsonb_path_column->get_data_at(0);
561
            if (!const_path.seek(r_raw_ref.data, r_raw_ref.size)) {
562
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
563
                                               r_raw_ref.to_string());
564
            }
565
566
            if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
567
                return Status::InvalidJsonPath(
568
                        "In this situation, path expressions may not contain the * and ** tokens "
569
                        "or an array range.");
570
            }
571
        }
572
573
30
        for (size_t i = 0; i < input_rows_count; ++i) {
574
24
            auto index = index_check_const(i, JSONB_DATA_CONST);
575
            // if jsonb data is null or path column is null , we should return null
576
24
            if (jsonb_data_nullmap && (*jsonb_data_nullmap)[index]) {
577
0
                res_null_map[i] = 1;
578
0
                dst_arr.insert_default();
579
0
                continue;
580
0
            }
581
24
            if constexpr (JSONB_PATH_PARAM && !JSON_PATH_CONST) {
582
24
                if (path_null_map && (*path_null_map)[i]) {
583
4
                    res_null_map[i] = 1;
584
4
                    dst_arr.insert_default();
585
4
                    continue;
586
4
                }
587
24
            }
588
589
20
            auto json_data = col_from_string.get_data_at(index);
590
24
            const JsonbDocument* doc = nullptr;
591
24
            auto st = JsonbDocument::checkAndCreateDocument(json_data.data, json_data.size, &doc);
592
24
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
593
0
                dst_arr.clear();
594
0
                return Status::InvalidArgument("jsonb data is invalid");
595
0
            }
596
24
            const JsonbValue* obj_val;
597
24
            JsonbFindResult find_result;
598
24
            if constexpr (JSONB_PATH_PARAM) {
599
24
                if constexpr (!JSON_PATH_CONST) {
600
24
                    auto data = jsonb_path_column->get_data_at(i);
601
24
                    JsonbPath path;
602
24
                    if (!path.seek(data.data, data.size)) {
603
0
                        return Status::InvalidArgument(
604
0
                                "Json path error: Invalid Json Path for value: {} at row: {}",
605
0
                                std::string_view(data.data, data.size), i);
606
0
                    }
607
608
24
                    if (path.is_wildcard() || path.is_supper_wildcard()) {
609
0
                        return Status::InvalidJsonPath(
610
0
                                "In this situation, path expressions may not contain the * and ** "
611
0
                                "tokens "
612
0
                                "or an array range. at row: {}",
613
0
                                i);
614
0
                    }
615
24
                    find_result = doc->getValue()->findValue(path);
616
                } else {
617
                    find_result = doc->getValue()->findValue(const_path);
618
                }
619
0
                obj_val = find_result.value;
620
            } else {
621
                obj_val = doc->getValue();
622
            }
623
624
24
            if (!obj_val || !obj_val->isObject()) {
625
                // if jsonb data is not object we should return null
626
8
                res_null_map[i] = 1;
627
8
                dst_arr.insert_default();
628
8
                continue;
629
8
            }
630
16
            const auto* obj = obj_val->unpack<ObjectVal>();
631
16
            for (const auto& it : *obj) {
632
16
                dst_nested_column.insert_data(it.getKeyStr(), it.klen());
633
16
            }
634
16
            dst_arr.get_offsets().push_back(dst_nested_column.size());
635
16
        } //for
636
6
        return Status::OK();
637
2
    }
Unexecuted instantiation: _ZN5doris17FunctionJsonbKeys15inner_loop_implILb1ELb1ELb1EEENS_6StatusEmRNS_11ColumnArrayERNS_14ColumnNullableERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS_9ColumnStrIjEEPKSB_PSF_SI_
638
};
639
640
class FunctionJsonbExtractPath : public IFunction {
641
public:
642
    static constexpr auto name = "json_exists_path";
643
    static constexpr auto alias = "jsonb_exists_path";
644
    using ColumnType = ColumnUInt8;
645
    using Container = typename ColumnType::Container;
646
183
    static FunctionPtr create() { return std::make_shared<FunctionJsonbExtractPath>(); }
647
1
    String get_name() const override { return name; }
648
174
    size_t get_number_of_arguments() const override { return 2; }
649
174
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
650
        // it only needs to indicate existence and does not need to return nullable values.
651
174
        const auto nullable = std::ranges::any_of(
652
196
                arguments, [](const DataTypePtr& type) { return type->is_nullable(); });
653
174
        if (nullable) {
654
153
            return make_nullable(std::make_shared<DataTypeUInt8>());
655
153
        } else {
656
21
            return std::make_shared<DataTypeUInt8>();
657
21
        }
658
174
    }
659
660
1.53k
    bool use_default_implementation_for_nulls() const override { return false; }
661
662
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
663
1.36k
                        uint32_t result, size_t input_rows_count) const override {
664
        // prepare jsonb data column
665
1.36k
        auto&& [jsonb_data_column, jsonb_data_const] =
666
1.36k
                unpack_if_const(block.get_by_position(arguments[0]).column);
667
668
1.36k
        const NullMap* data_null_map = nullptr;
669
1.36k
        const ColumnString* data_col = nullptr;
670
1.36k
        if (const auto* nullable = check_and_get_column<ColumnNullable>(jsonb_data_column.get())) {
671
1.17k
            data_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
672
1.17k
            data_null_map = &nullable->get_null_map_data();
673
1.17k
        } else {
674
184
            data_col = assert_cast<const ColumnString*>(jsonb_data_column.get());
675
184
        }
676
677
1.36k
        const auto& ldata = data_col->get_chars();
678
1.36k
        const auto& loffsets = data_col->get_offsets();
679
680
        // prepare parse path column prepare
681
1.36k
        auto&& [path_column, path_const] =
682
1.36k
                unpack_if_const(block.get_by_position(arguments[1]).column);
683
1.36k
        const ColumnString* path_col = nullptr;
684
1.36k
        const NullMap* path_null_map = nullptr;
685
1.36k
        if (const auto* nullable = check_and_get_column<ColumnNullable>(path_column.get())) {
686
7
            path_col = assert_cast<const ColumnString*>(nullable->get_nested_column_ptr().get());
687
7
            path_null_map = &nullable->get_null_map_data();
688
1.35k
        } else {
689
1.35k
            path_col = assert_cast<const ColumnString*>(path_column.get());
690
1.35k
        }
691
692
1.36k
        DORIS_CHECK(!(jsonb_data_const && path_const))
693
0
                << "jsonb_data_const and path_const should not be both const";
694
695
1.36k
        auto create_all_null_result = [&]() {
696
3
            auto res = ColumnType::create();
697
3
            res->insert_default();
698
3
            auto nullable_column =
699
3
                    ColumnNullable::create(std::move(res), ColumnUInt8::create(1, 1));
700
3
            auto const_column = ColumnConst::create(std::move(nullable_column), input_rows_count);
701
3
            block.get_by_position(result).column = std::move(const_column);
702
3
            return Status::OK();
703
3
        };
704
705
1.36k
        ColumnUInt8::MutablePtr result_null_map_column;
706
1.36k
        NullMap* result_null_map = nullptr;
707
1.36k
        if (data_null_map || path_null_map) {
708
1.17k
            result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
709
1.17k
            result_null_map = &result_null_map_column->get_data();
710
711
1.17k
            if (data_null_map) {
712
1.17k
                VectorizedUtils::update_null_map(*result_null_map, *data_null_map,
713
1.17k
                                                 jsonb_data_const);
714
1.17k
            }
715
716
1.17k
            if (path_null_map) {
717
7
                VectorizedUtils::update_null_map(*result_null_map, *path_null_map, path_const);
718
7
            }
719
720
1.17k
            if (!simd::contain_zero(result_null_map->data(), input_rows_count)) {
721
3
                return create_all_null_result();
722
3
            }
723
1.17k
        }
724
725
1.35k
        auto res = ColumnType::create();
726
727
1.35k
        bool is_invalid_json_path = false;
728
729
1.35k
        const auto& rdata = path_col->get_chars();
730
1.35k
        const auto& roffsets = path_col->get_offsets();
731
1.35k
        if (jsonb_data_const) {
732
2
            if (data_null_map && (*data_null_map)[0]) {
733
0
                return create_all_null_result();
734
0
            }
735
2
            scalar_vector(context, data_col->get_data_at(0), rdata, roffsets, res->get_data(),
736
2
                          result_null_map, is_invalid_json_path);
737
1.35k
        } else if (path_const) {
738
1.32k
            if (path_null_map && (*path_null_map)[0]) {
739
0
                return create_all_null_result();
740
0
            }
741
1.32k
            vector_scalar(context, ldata, loffsets, path_col->get_data_at(0), res->get_data(),
742
1.32k
                          result_null_map, is_invalid_json_path);
743
1.32k
        } else {
744
35
            vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
745
35
                          result_null_map, is_invalid_json_path);
746
35
        }
747
1.35k
        if (is_invalid_json_path) {
748
7
            return Status::InvalidArgument(
749
7
                    "Json path error: Invalid Json Path for value: {}",
750
7
                    std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
751
7
        }
752
753
1.35k
        if (result_null_map) {
754
1.17k
            auto nullabel_col =
755
1.17k
                    ColumnNullable::create(std::move(res), std::move(result_null_map_column));
756
1.17k
            block.get_by_position(result).column = std::move(nullabel_col);
757
1.17k
        } else {
758
181
            block.get_by_position(result).column = std::move(res);
759
181
        }
760
1.35k
        return Status::OK();
761
1.35k
    }
762
763
private:
764
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, const char* l_raw_str,
765
3.01k
                                              size_t l_str_size, JsonbPath& path) {
766
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
767
3.01k
        const JsonbDocument* doc = nullptr;
768
3.01k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
769
3.01k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
770
0
            return;
771
0
        }
772
773
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
774
3.01k
        auto result = doc->getValue()->findValue(path);
775
776
3.01k
        if (result.value) {
777
445
            res[i] = 1;
778
445
        }
779
3.01k
    }
780
    static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
781
                              const ColumnString::Offsets& loffsets,
782
                              const ColumnString::Chars& rdata,
783
                              const ColumnString::Offsets& roffsets, Container& res,
784
35
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
785
35
        const size_t size = loffsets.size();
786
35
        res.resize_fill(size, 0);
787
788
80
        for (size_t i = 0; i < size; i++) {
789
50
            if (result_null_map && (*result_null_map)[i]) {
790
8
                continue;
791
8
            }
792
793
42
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
794
42
            int l_str_size = loffsets[i] - loffsets[i - 1];
795
796
42
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
797
42
            int r_str_size = roffsets[i] - roffsets[i - 1];
798
799
42
            JsonbPath path;
800
42
            if (!path.seek(r_raw_str, r_str_size)) {
801
5
                is_invalid_json_path = true;
802
5
                return;
803
5
            }
804
805
37
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
806
37
        }
807
35
    }
808
    static void scalar_vector(FunctionContext* context, const StringRef& ldata,
809
                              const ColumnString::Chars& rdata,
810
                              const ColumnString::Offsets& roffsets, Container& res,
811
2
                              const NullMap* result_null_map, bool& is_invalid_json_path) {
812
2
        const size_t size = roffsets.size();
813
2
        res.resize_fill(size, 0);
814
815
14
        for (size_t i = 0; i < size; i++) {
816
13
            if (result_null_map && (*result_null_map)[i]) {
817
4
                continue;
818
4
            }
819
9
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
820
9
            int r_str_size = roffsets[i] - roffsets[i - 1];
821
822
9
            JsonbPath path;
823
9
            if (!path.seek(r_raw_str, r_str_size)) {
824
1
                is_invalid_json_path = true;
825
1
                return;
826
1
            }
827
828
8
            inner_loop_impl(i, res, ldata.data, ldata.size, path);
829
8
        }
830
2
    }
831
    static void vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
832
                              const ColumnString::Offsets& loffsets, const StringRef& rdata,
833
                              Container& res, const NullMap* result_null_map,
834
1.32k
                              bool& is_invalid_json_path) {
835
1.32k
        const size_t size = loffsets.size();
836
1.32k
        res.resize_fill(size, 0);
837
838
1.32k
        JsonbPath path;
839
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
840
1
            is_invalid_json_path = true;
841
1
            return;
842
1
        }
843
844
4.51k
        for (size_t i = 0; i < size; i++) {
845
3.19k
            if (result_null_map && (*result_null_map)[i]) {
846
232
                continue;
847
232
            }
848
2.96k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
849
2.96k
            int l_str_size = loffsets[i] - loffsets[i - 1];
850
851
2.96k
            inner_loop_impl(i, res, l_raw_str, l_str_size, path);
852
2.96k
        }
853
1.32k
    }
854
};
855
856
template <typename ValueType>
857
struct JsonbExtractStringImpl {
858
    using ReturnType = typename ValueType::ReturnType;
859
    using ColumnType = typename ValueType::ColumnType;
860
861
private:
862
    static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i,
863
                                              ColumnString::Chars& res_data,
864
                                              ColumnString::Offsets& res_offsets, NullMap& null_map,
865
412
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
866
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
867
412
        const JsonbDocument* doc = nullptr;
868
412
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
869
412
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
870
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
871
0
            return;
872
0
        }
873
874
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
875
412
        auto find_result = doc->getValue()->findValue(path);
876
877
412
        if (UNLIKELY(!find_result.value)) {
878
46
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
46
            return;
880
46
        }
881
882
366
        if constexpr (ValueType::only_get_type) {
883
0
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
884
0
                                        res_data, res_offsets);
885
0
            return;
886
366
        } else {
887
366
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
888
366
            if constexpr (ValueType::no_quotes) {
889
0
                if (find_result.value->isString()) {
890
0
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
891
0
                    const auto* blob = str_value->getBlob();
892
0
                    if (str_value->length() > 1 && blob[0] == '"' &&
893
0
                        blob[str_value->length() - 1] == '"') {
894
0
                        writer->writeStartString();
895
0
                        writer->writeString(blob + 1, str_value->length() - 2);
896
0
                        writer->writeEndString();
897
0
                        StringOP::push_value_string(
898
0
                                std::string_view(writer->getOutput()->getBuffer(),
899
0
                                                 writer->getOutput()->getSize()),
900
0
                                i, res_data, res_offsets);
901
0
                        return;
902
0
                    }
903
0
                }
904
0
            }
905
0
            writer->writeValueSimple(find_result.value);
906
366
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
907
366
                                                         writer->getOutput()->getSize()),
908
366
                                        i, res_data, res_offsets);
909
366
        }
910
366
    }
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_PKcmRNS_9JsonbPathE
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_PKcmRNS_9JsonbPathE
Line
Count
Source
865
412
                                              const char* l_raw, size_t l_size, JsonbPath& path) {
866
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
867
412
        const JsonbDocument* doc = nullptr;
868
412
        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
869
412
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
870
0
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
871
0
            return;
872
0
        }
873
874
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
875
412
        auto find_result = doc->getValue()->findValue(path);
876
877
412
        if (UNLIKELY(!find_result.value)) {
878
46
            StringOP::push_null_string(i, res_data, res_offsets, null_map);
879
46
            return;
880
46
        }
881
882
        if constexpr (ValueType::only_get_type) {
883
            StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
884
                                        res_data, res_offsets);
885
            return;
886
366
        } else {
887
366
            static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
888
            if constexpr (ValueType::no_quotes) {
889
                if (find_result.value->isString()) {
890
                    const auto* str_value = find_result.value->unpack<JsonbStringVal>();
891
                    const auto* blob = str_value->getBlob();
892
                    if (str_value->length() > 1 && blob[0] == '"' &&
893
                        blob[str_value->length() - 1] == '"') {
894
                        writer->writeStartString();
895
                        writer->writeString(blob + 1, str_value->length() - 2);
896
                        writer->writeEndString();
897
                        StringOP::push_value_string(
898
                                std::string_view(writer->getOutput()->getBuffer(),
899
                                                 writer->getOutput()->getSize()),
900
                                i, res_data, res_offsets);
901
                        return;
902
                    }
903
                }
904
            }
905
366
            writer->writeValueSimple(find_result.value);
906
366
            StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
907
366
                                                         writer->getOutput()->getSize()),
908
366
                                        i, res_data, res_offsets);
909
366
        }
910
366
    }
Unexecuted instantiation: _ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE15inner_loop_implEPNS_12JsonbWriterTINS_14JsonbOutStreamEEEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEESC_PKcmRNS_9JsonbPathE
911
912
public:
913
    // for jsonb_extract_string
914
    static Status vector_vector_v2(
915
            FunctionContext* context, const ColumnString::Chars& ldata,
916
            const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
917
            const bool& json_data_const,
918
            const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
919
            const std::vector<const NullMap*>& r_null_maps, const std::vector<bool>& path_const,
920
11.3k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
921
11.3k
        const size_t input_rows_count = null_map.size();
922
11.3k
        res_offsets.resize(input_rows_count);
923
924
11.3k
        auto writer = std::make_unique<JsonbWriter>();
925
926
        // reuseable json path list, espacially for const path
927
11.3k
        std::vector<JsonbPath> json_path_list;
928
11.3k
        json_path_list.resize(rdata_columns.size());
929
930
        // lambda function to parse json path for row i and path pi
931
11.7k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
11.7k
            const auto index = index_check_const(i, path_const[pi]);
933
934
11.7k
            const ColumnString* path_col = rdata_columns[pi];
935
11.7k
            const ColumnString::Chars& rdata = path_col->get_chars();
936
11.7k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
11.7k
            size_t r_off = roffsets[index - 1];
938
11.7k
            size_t r_size = roffsets[index] - r_off;
939
11.7k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
11.7k
            JsonbPath path;
942
11.7k
            if (!path.seek(r_raw, r_size)) {
943
7
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
7
                                               std::string_view(r_raw, r_size));
945
7
            }
946
947
11.7k
            json_path_list[pi] = std::move(path);
948
949
11.7k
            return Status::OK();
950
11.7k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
931
1.34k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
1.34k
            const auto index = index_check_const(i, path_const[pi]);
933
934
1.34k
            const ColumnString* path_col = rdata_columns[pi];
935
1.34k
            const ColumnString::Chars& rdata = path_col->get_chars();
936
1.34k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
1.34k
            size_t r_off = roffsets[index - 1];
938
1.34k
            size_t r_size = roffsets[index] - r_off;
939
1.34k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
1.34k
            JsonbPath path;
942
1.34k
            if (!path.seek(r_raw, r_size)) {
943
1
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
1
                                               std::string_view(r_raw, r_size));
945
1
            }
946
947
1.33k
            json_path_list[pi] = std::move(path);
948
949
1.33k
            return Status::OK();
950
1.34k
        };
_ZZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
931
10.4k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
10.4k
            const auto index = index_check_const(i, path_const[pi]);
933
934
10.4k
            const ColumnString* path_col = rdata_columns[pi];
935
10.4k
            const ColumnString::Chars& rdata = path_col->get_chars();
936
10.4k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
10.4k
            size_t r_off = roffsets[index - 1];
938
10.4k
            size_t r_size = roffsets[index] - r_off;
939
10.4k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
10.4k
            JsonbPath path;
942
10.4k
            if (!path.seek(r_raw, r_size)) {
943
6
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
6
                                               std::string_view(r_raw, r_size));
945
6
            }
946
947
10.4k
            json_path_list[pi] = std::move(path);
948
949
10.4k
            return Status::OK();
950
10.4k
        };
_ZZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_ENKUlmmE_clEmm
Line
Count
Source
931
11
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
11
            const auto index = index_check_const(i, path_const[pi]);
933
934
11
            const ColumnString* path_col = rdata_columns[pi];
935
11
            const ColumnString::Chars& rdata = path_col->get_chars();
936
11
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
11
            size_t r_off = roffsets[index - 1];
938
11
            size_t r_size = roffsets[index] - r_off;
939
11
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
11
            JsonbPath path;
942
11
            if (!path.seek(r_raw, r_size)) {
943
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
0
                                               std::string_view(r_raw, r_size));
945
0
            }
946
947
11
            json_path_list[pi] = std::move(path);
948
949
11
            return Status::OK();
950
11
        };
951
952
23.1k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
953
11.7k
            if (path_const[pi]) {
954
11.4k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
955
41
                    continue;
956
41
                }
957
11.3k
                RETURN_IF_ERROR(parse_json_path(0, pi));
958
11.3k
            }
959
11.7k
        }
960
961
11.3k
        res_data.reserve(ldata.size());
962
153k
        for (size_t i = 0; i < input_rows_count; ++i) {
963
142k
            if (null_map[i]) {
964
0
                continue;
965
0
            }
966
967
142k
            const auto data_index = index_check_const(i, json_data_const);
968
142k
            if (l_null_map && (*l_null_map)[data_index]) {
969
1.91k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
970
1.91k
                continue;
971
1.91k
            }
972
973
140k
            size_t l_off = loffsets[data_index - 1];
974
140k
            size_t l_size = loffsets[data_index] - l_off;
975
140k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
976
140k
            if (rdata_columns.size() == 1) { // just return origin value
977
139k
                const auto path_index = index_check_const(i, path_const[0]);
978
139k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
979
33
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
980
33
                    continue;
981
33
                }
982
983
139k
                if (!path_const[0]) {
984
328
                    RETURN_IF_ERROR(parse_json_path(i, 0));
985
328
                }
986
987
139k
                writer->reset();
988
139k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, l_raw, l_size,
989
139k
                                json_path_list[0]);
990
139k
            } else { // will make array string to user
991
923
                writer->reset();
992
923
                bool has_value = false;
993
994
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
995
923
                const JsonbDocument* doc = nullptr;
996
923
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
997
998
2.05k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
999
1.23k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1000
0
                        continue;
1001
0
                    }
1002
1003
1.23k
                    const auto path_index = index_check_const(i, path_const[pi]);
1004
1.23k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1005
99
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1006
99
                        break;
1007
99
                    }
1008
1009
1.13k
                    if (!path_const[pi]) {
1010
70
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1011
70
                    }
1012
1013
1.13k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1014
1015
1.13k
                    if (find_result.value) {
1016
282
                        if (!has_value) {
1017
153
                            has_value = true;
1018
153
                            writer->writeStartArray();
1019
153
                        }
1020
282
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1021
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1022
                            // if value is array, we should write all items in array, instead of write the array itself.
1023
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1024
54
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1025
54
                                writer->writeValue(&item);
1026
54
                            }
1027
259
                        } else {
1028
259
                            writer->writeValue(find_result.value);
1029
259
                        }
1030
282
                    }
1031
1.13k
                }
1032
923
                if (has_value) {
1033
153
                    writer->writeEndArray();
1034
153
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1035
153
                                                                 writer->getOutput()->getSize()),
1036
153
                                                i, res_data, res_offsets);
1037
770
                } else {
1038
770
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1039
770
                }
1040
923
            }
1041
140k
        } //for
1042
11.3k
        return Status::OK();
1043
11.3k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeTypeEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
920
1.32k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
921
1.32k
        const size_t input_rows_count = null_map.size();
922
1.32k
        res_offsets.resize(input_rows_count);
923
924
1.32k
        auto writer = std::make_unique<JsonbWriter>();
925
926
        // reuseable json path list, espacially for const path
927
1.32k
        std::vector<JsonbPath> json_path_list;
928
1.32k
        json_path_list.resize(rdata_columns.size());
929
930
        // lambda function to parse json path for row i and path pi
931
1.32k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
1.32k
            const auto index = index_check_const(i, path_const[pi]);
933
934
1.32k
            const ColumnString* path_col = rdata_columns[pi];
935
1.32k
            const ColumnString::Chars& rdata = path_col->get_chars();
936
1.32k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
1.32k
            size_t r_off = roffsets[index - 1];
938
1.32k
            size_t r_size = roffsets[index] - r_off;
939
1.32k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
1.32k
            JsonbPath path;
942
1.32k
            if (!path.seek(r_raw, r_size)) {
943
1.32k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
1.32k
                                               std::string_view(r_raw, r_size));
945
1.32k
            }
946
947
1.32k
            json_path_list[pi] = std::move(path);
948
949
1.32k
            return Status::OK();
950
1.32k
        };
951
952
2.65k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
953
1.32k
            if (path_const[pi]) {
954
1.32k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
955
1
                    continue;
956
1
                }
957
1.32k
                RETURN_IF_ERROR(parse_json_path(0, pi));
958
1.32k
            }
959
1.32k
        }
960
961
1.32k
        res_data.reserve(ldata.size());
962
4.57k
        for (size_t i = 0; i < input_rows_count; ++i) {
963
3.24k
            if (null_map[i]) {
964
0
                continue;
965
0
            }
966
967
3.24k
            const auto data_index = index_check_const(i, json_data_const);
968
3.24k
            if (l_null_map && (*l_null_map)[data_index]) {
969
248
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
970
248
                continue;
971
248
            }
972
973
3.00k
            size_t l_off = loffsets[data_index - 1];
974
3.00k
            size_t l_size = loffsets[data_index] - l_off;
975
3.00k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
976
3.00k
            if (rdata_columns.size() == 1) { // just return origin value
977
3.00k
                const auto path_index = index_check_const(i, path_const[0]);
978
3.00k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
979
16
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
980
16
                    continue;
981
16
                }
982
983
2.98k
                if (!path_const[0]) {
984
18
                    RETURN_IF_ERROR(parse_json_path(i, 0));
985
18
                }
986
987
2.98k
                writer->reset();
988
2.98k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, l_raw, l_size,
989
2.98k
                                json_path_list[0]);
990
2.98k
            } else { // will make array string to user
991
0
                writer->reset();
992
0
                bool has_value = false;
993
994
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
995
0
                const JsonbDocument* doc = nullptr;
996
0
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
997
998
0
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
999
0
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1000
0
                        continue;
1001
0
                    }
1002
1003
0
                    const auto path_index = index_check_const(i, path_const[pi]);
1004
0
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1005
0
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1006
0
                        break;
1007
0
                    }
1008
1009
0
                    if (!path_const[pi]) {
1010
0
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1011
0
                    }
1012
1013
0
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1014
1015
0
                    if (find_result.value) {
1016
0
                        if (!has_value) {
1017
0
                            has_value = true;
1018
0
                            writer->writeStartArray();
1019
0
                        }
1020
0
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1021
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1022
                            // if value is array, we should write all items in array, instead of write the array itself.
1023
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1024
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1025
0
                                writer->writeValue(&item);
1026
0
                            }
1027
0
                        } else {
1028
0
                            writer->writeValue(find_result.value);
1029
0
                        }
1030
0
                    }
1031
0
                }
1032
0
                if (has_value) {
1033
0
                    writer->writeEndArray();
1034
0
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1035
0
                                                                 writer->getOutput()->getSize()),
1036
0
                                                i, res_data, res_offsets);
1037
0
                } else {
1038
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1039
0
                }
1040
0
            }
1041
3.00k
        } //for
1042
1.32k
        return Status::OK();
1043
1.32k
    }
_ZN5doris22JsonbExtractStringImplINS_13JsonbTypeJsonEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
920
10.0k
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
921
10.0k
        const size_t input_rows_count = null_map.size();
922
10.0k
        res_offsets.resize(input_rows_count);
923
924
10.0k
        auto writer = std::make_unique<JsonbWriter>();
925
926
        // reuseable json path list, espacially for const path
927
10.0k
        std::vector<JsonbPath> json_path_list;
928
10.0k
        json_path_list.resize(rdata_columns.size());
929
930
        // lambda function to parse json path for row i and path pi
931
10.0k
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
10.0k
            const auto index = index_check_const(i, path_const[pi]);
933
934
10.0k
            const ColumnString* path_col = rdata_columns[pi];
935
10.0k
            const ColumnString::Chars& rdata = path_col->get_chars();
936
10.0k
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
10.0k
            size_t r_off = roffsets[index - 1];
938
10.0k
            size_t r_size = roffsets[index] - r_off;
939
10.0k
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
10.0k
            JsonbPath path;
942
10.0k
            if (!path.seek(r_raw, r_size)) {
943
10.0k
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
10.0k
                                               std::string_view(r_raw, r_size));
945
10.0k
            }
946
947
10.0k
            json_path_list[pi] = std::move(path);
948
949
10.0k
            return Status::OK();
950
10.0k
        };
951
952
20.4k
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
953
10.3k
            if (path_const[pi]) {
954
10.1k
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
955
40
                    continue;
956
40
                }
957
10.0k
                RETURN_IF_ERROR(parse_json_path(0, pi));
958
10.0k
            }
959
10.3k
        }
960
961
10.0k
        res_data.reserve(ldata.size());
962
148k
        for (size_t i = 0; i < input_rows_count; ++i) {
963
138k
            if (null_map[i]) {
964
0
                continue;
965
0
            }
966
967
138k
            const auto data_index = index_check_const(i, json_data_const);
968
138k
            if (l_null_map && (*l_null_map)[data_index]) {
969
1.66k
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
970
1.66k
                continue;
971
1.66k
            }
972
973
137k
            size_t l_off = loffsets[data_index - 1];
974
137k
            size_t l_size = loffsets[data_index] - l_off;
975
137k
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
976
137k
            if (rdata_columns.size() == 1) { // just return origin value
977
136k
                const auto path_index = index_check_const(i, path_const[0]);
978
136k
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
979
17
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
980
17
                    continue;
981
17
                }
982
983
136k
                if (!path_const[0]) {
984
305
                    RETURN_IF_ERROR(parse_json_path(i, 0));
985
305
                }
986
987
136k
                writer->reset();
988
136k
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, l_raw, l_size,
989
136k
                                json_path_list[0]);
990
136k
            } else { // will make array string to user
991
920
                writer->reset();
992
920
                bool has_value = false;
993
994
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
995
920
                const JsonbDocument* doc = nullptr;
996
920
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
997
998
2.04k
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
999
1.22k
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1000
0
                        continue;
1001
0
                    }
1002
1003
1.22k
                    const auto path_index = index_check_const(i, path_const[pi]);
1004
1.22k
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1005
98
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1006
98
                        break;
1007
98
                    }
1008
1009
1.12k
                    if (!path_const[pi]) {
1010
64
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1011
64
                    }
1012
1013
1.12k
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1014
1015
1.12k
                    if (find_result.value) {
1016
276
                        if (!has_value) {
1017
150
                            has_value = true;
1018
150
                            writer->writeStartArray();
1019
150
                        }
1020
276
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1021
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1022
                            // if value is array, we should write all items in array, instead of write the array itself.
1023
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1024
54
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1025
54
                                writer->writeValue(&item);
1026
54
                            }
1027
253
                        } else {
1028
253
                            writer->writeValue(find_result.value);
1029
253
                        }
1030
276
                    }
1031
1.12k
                }
1032
920
                if (has_value) {
1033
150
                    writer->writeEndArray();
1034
150
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1035
150
                                                                 writer->getOutput()->getSize()),
1036
150
                                                i, res_data, res_offsets);
1037
770
                } else {
1038
770
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1039
770
                }
1040
920
            }
1041
137k
        } //for
1042
10.0k
        return Status::OK();
1043
10.0k
    }
_ZN5doris22JsonbExtractStringImplINS_21JsonbTypeJsonNoQuotesEE16vector_vector_v2EPNS_15FunctionContextERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS5_IjLm4096ES8_Lm16ELm15EEEPSA_RKbRKSt6vectorIPKNS_9ColumnStrIjEESaISM_EERKSI_ISF_SaISF_EERKSI_IbSaIbEERS9_RSC_SZ_
Line
Count
Source
920
9
            ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, NullMap& null_map) {
921
9
        const size_t input_rows_count = null_map.size();
922
9
        res_offsets.resize(input_rows_count);
923
924
9
        auto writer = std::make_unique<JsonbWriter>();
925
926
        // reuseable json path list, espacially for const path
927
9
        std::vector<JsonbPath> json_path_list;
928
9
        json_path_list.resize(rdata_columns.size());
929
930
        // lambda function to parse json path for row i and path pi
931
9
        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
932
9
            const auto index = index_check_const(i, path_const[pi]);
933
934
9
            const ColumnString* path_col = rdata_columns[pi];
935
9
            const ColumnString::Chars& rdata = path_col->get_chars();
936
9
            const ColumnString::Offsets& roffsets = path_col->get_offsets();
937
9
            size_t r_off = roffsets[index - 1];
938
9
            size_t r_size = roffsets[index] - r_off;
939
9
            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
940
941
9
            JsonbPath path;
942
9
            if (!path.seek(r_raw, r_size)) {
943
9
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
944
9
                                               std::string_view(r_raw, r_size));
945
9
            }
946
947
9
            json_path_list[pi] = std::move(path);
948
949
9
            return Status::OK();
950
9
        };
951
952
22
        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
953
13
            if (path_const[pi]) {
954
0
                if (r_null_maps[pi] && (*r_null_maps[pi])[0]) {
955
0
                    continue;
956
0
                }
957
0
                RETURN_IF_ERROR(parse_json_path(0, pi));
958
0
            }
959
13
        }
960
961
9
        res_data.reserve(ldata.size());
962
18
        for (size_t i = 0; i < input_rows_count; ++i) {
963
9
            if (null_map[i]) {
964
0
                continue;
965
0
            }
966
967
9
            const auto data_index = index_check_const(i, json_data_const);
968
9
            if (l_null_map && (*l_null_map)[data_index]) {
969
1
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
970
1
                continue;
971
1
            }
972
973
8
            size_t l_off = loffsets[data_index - 1];
974
8
            size_t l_size = loffsets[data_index] - l_off;
975
8
            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
976
8
            if (rdata_columns.size() == 1) { // just return origin value
977
5
                const auto path_index = index_check_const(i, path_const[0]);
978
5
                if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
979
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
980
0
                    continue;
981
0
                }
982
983
5
                if (!path_const[0]) {
984
5
                    RETURN_IF_ERROR(parse_json_path(i, 0));
985
5
                }
986
987
5
                writer->reset();
988
5
                inner_loop_impl(writer.get(), i, res_data, res_offsets, null_map, l_raw, l_size,
989
5
                                json_path_list[0]);
990
5
            } else { // will make array string to user
991
3
                writer->reset();
992
3
                bool has_value = false;
993
994
                // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
995
3
                const JsonbDocument* doc = nullptr;
996
3
                auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
997
998
9
                for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
999
7
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1000
0
                        continue;
1001
0
                    }
1002
1003
7
                    const auto path_index = index_check_const(i, path_const[pi]);
1004
7
                    if (r_null_maps[pi] && (*r_null_maps[pi])[path_index]) {
1005
1
                        StringOP::push_null_string(i, res_data, res_offsets, null_map);
1006
1
                        break;
1007
1
                    }
1008
1009
6
                    if (!path_const[pi]) {
1010
6
                        RETURN_IF_ERROR(parse_json_path(i, pi));
1011
6
                    }
1012
1013
6
                    auto find_result = doc->getValue()->findValue(json_path_list[pi]);
1014
1015
6
                    if (find_result.value) {
1016
6
                        if (!has_value) {
1017
3
                            has_value = true;
1018
3
                            writer->writeStartArray();
1019
3
                        }
1020
6
                        if (find_result.value->isArray() && find_result.is_wildcard) {
1021
                            // To avoid getting results of nested array like [[1, 2, 3], [4, 5, 6]],
1022
                            // if value is array, we should write all items in array, instead of write the array itself.
1023
                            // finaly we will get results like [1, 2, 3, 4, 5, 6]
1024
0
                            for (const auto& item : *find_result.value->unpack<ArrayVal>()) {
1025
0
                                writer->writeValue(&item);
1026
0
                            }
1027
6
                        } else {
1028
6
                            writer->writeValue(find_result.value);
1029
6
                        }
1030
6
                    }
1031
6
                }
1032
3
                if (has_value) {
1033
3
                    writer->writeEndArray();
1034
3
                    StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
1035
3
                                                                 writer->getOutput()->getSize()),
1036
3
                                                i, res_data, res_offsets);
1037
3
                } else {
1038
0
                    StringOP::push_null_string(i, res_data, res_offsets, null_map);
1039
0
                }
1040
3
            }
1041
8
        } //for
1042
9
        return Status::OK();
1043
9
    }
1044
1045
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1046
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1047
                                const ColumnString::Chars& rdata,
1048
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1049
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1050
                                NullMap& null_map) {
1051
        size_t input_rows_count = loffsets.size();
1052
        res_offsets.resize(input_rows_count);
1053
1054
        JsonbWriter writer;
1055
        for (size_t i = 0; i < input_rows_count; ++i) {
1056
            if (l_null_map && (*l_null_map)[i]) {
1057
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1058
                continue;
1059
            }
1060
1061
            if (r_null_map && (*r_null_map)[i]) {
1062
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1063
                continue;
1064
            }
1065
1066
            int l_size = loffsets[i] - loffsets[i - 1];
1067
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1068
1069
            int r_size = roffsets[i] - roffsets[i - 1];
1070
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1071
1072
            JsonbPath path;
1073
            if (!path.seek(r_raw, r_size)) {
1074
                return Status::InvalidArgument(
1075
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1076
                        std::string_view(r_raw, r_size), i);
1077
            }
1078
1079
            writer.reset();
1080
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, l_raw, l_size, path);
1081
        } //for
1082
        return Status::OK();
1083
    } //function
1084
1085
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1086
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1087
                                const StringRef& rdata, ColumnString::Chars& res_data,
1088
                                ColumnString::Offsets& res_offsets, NullMap& null_map) {
1089
        size_t input_rows_count = loffsets.size();
1090
        res_offsets.resize(input_rows_count);
1091
1092
        JsonbPath path;
1093
        if (!path.seek(rdata.data, rdata.size)) {
1094
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1095
                                           std::string_view(rdata.data, rdata.size));
1096
        }
1097
1098
        JsonbWriter writer;
1099
        for (size_t i = 0; i < input_rows_count; ++i) {
1100
            if (l_null_map && (*l_null_map)[i]) {
1101
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1102
                continue;
1103
            }
1104
1105
            int l_size = loffsets[i] - loffsets[i - 1];
1106
            const char* l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1107
1108
            writer.reset();
1109
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, l_raw, l_size, path);
1110
        } //for
1111
        return Status::OK();
1112
    } //function
1113
1114
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1115
                                const ColumnString::Chars& rdata,
1116
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1117
                                ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1118
                                NullMap& null_map) {
1119
        size_t input_rows_count = roffsets.size();
1120
        res_offsets.resize(input_rows_count);
1121
1122
        JsonbWriter writer;
1123
1124
        for (size_t i = 0; i < input_rows_count; ++i) {
1125
            if (r_null_map && (*r_null_map)[i]) {
1126
                StringOP::push_null_string(i, res_data, res_offsets, null_map);
1127
                continue;
1128
            }
1129
1130
            int r_size = roffsets[i] - roffsets[i - 1];
1131
            const char* r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1132
1133
            JsonbPath path;
1134
            if (!path.seek(r_raw, r_size)) {
1135
                return Status::InvalidArgument(
1136
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1137
                        std::string_view(r_raw, r_size), i);
1138
            }
1139
1140
            writer.reset();
1141
            inner_loop_impl(&writer, i, res_data, res_offsets, null_map, ldata.data, ldata.size,
1142
                            path);
1143
        } //for
1144
        return Status::OK();
1145
    } //function
1146
};
1147
1148
struct JsonbExtractIsnull {
1149
    static constexpr auto name = "json_extract_isnull";
1150
    static constexpr auto alias = "jsonb_extract_isnull";
1151
1152
    using ReturnType = DataTypeUInt8;
1153
    using ColumnType = ColumnUInt8;
1154
    using Container = typename ColumnType::Container;
1155
1156
private:
1157
    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, NullMap& null_map,
1158
                                              const char* l_raw_str, size_t l_str_size,
1159
2.97k
                                              JsonbPath& path) {
1160
2.97k
        if (null_map[i]) {
1161
0
            res[i] = 0;
1162
0
            return;
1163
0
        }
1164
1165
        // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1166
2.97k
        const JsonbDocument* doc = nullptr;
1167
2.97k
        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, &doc);
1168
2.97k
        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1169
0
            null_map[i] = 1;
1170
0
            res[i] = 0;
1171
0
            return;
1172
0
        }
1173
1174
        // value is NOT necessary to be deleted since JsonbValue will not allocate memory
1175
2.97k
        auto find_result = doc->getValue()->findValue(path);
1176
2.97k
        const auto* value = find_result.value;
1177
1178
2.97k
        if (UNLIKELY(!value)) {
1179
2.55k
            null_map[i] = 1;
1180
2.55k
            res[i] = 0;
1181
2.55k
            return;
1182
2.55k
        }
1183
1184
420
        res[i] = value->isNull();
1185
420
    }
1186
1187
public:
1188
    // for jsonb_extract_int/int64/double
1189
    static Status vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
1190
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1191
                                const ColumnString::Chars& rdata,
1192
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1193
4
                                Container& res, NullMap& null_map) {
1194
4
        size_t size = loffsets.size();
1195
4
        res.resize(size);
1196
1197
19
        for (size_t i = 0; i < loffsets.size(); i++) {
1198
15
            if ((l_null_map && (*l_null_map)[i]) || (r_null_map && (*r_null_map)[i])) {
1199
8
                res[i] = 0;
1200
8
                null_map[i] = 1;
1201
8
                continue;
1202
8
            }
1203
1204
7
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1205
7
            int l_str_size = loffsets[i] - loffsets[i - 1];
1206
1207
7
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1208
7
            int r_str_size = roffsets[i] - roffsets[i - 1];
1209
1210
7
            JsonbPath path;
1211
7
            if (!path.seek(r_raw_str, r_str_size)) {
1212
0
                return Status::InvalidArgument(
1213
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1214
0
                        std::string_view(r_raw_str, r_str_size), i);
1215
0
            }
1216
1217
7
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1218
7
        } //for
1219
4
        return Status::OK();
1220
4
    } //function
1221
1222
    static Status scalar_vector(FunctionContext* context, const StringRef& ldata,
1223
                                const ColumnString::Chars& rdata,
1224
                                const ColumnString::Offsets& roffsets, const NullMap* r_null_map,
1225
1
                                Container& res, NullMap& null_map) {
1226
1
        size_t size = roffsets.size();
1227
1
        res.resize(size);
1228
1229
13
        for (size_t i = 0; i < size; i++) {
1230
12
            if (r_null_map && (*r_null_map)[i]) {
1231
4
                res[i] = 0;
1232
4
                null_map[i] = 1;
1233
4
                continue;
1234
4
            }
1235
1236
8
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
1237
8
            int r_str_size = roffsets[i] - roffsets[i - 1];
1238
1239
8
            JsonbPath path;
1240
8
            if (!path.seek(r_raw_str, r_str_size)) {
1241
0
                return Status::InvalidArgument(
1242
0
                        "Json path error: Invalid Json Path for value: {} at row: {}",
1243
0
                        std::string_view(r_raw_str, r_str_size), i);
1244
0
            }
1245
1246
8
            inner_loop_impl(i, res, null_map, ldata.data, ldata.size, path);
1247
8
        } //for
1248
1
        return Status::OK();
1249
1
    } //function
1250
1251
    static Status vector_scalar(FunctionContext* context, const ColumnString::Chars& ldata,
1252
                                const ColumnString::Offsets& loffsets, const NullMap* l_null_map,
1253
1.32k
                                const StringRef& rdata, Container& res, NullMap& null_map) {
1254
1.32k
        size_t size = loffsets.size();
1255
1.32k
        res.resize(size);
1256
1257
1.32k
        JsonbPath path;
1258
1.32k
        if (!path.seek(rdata.data, rdata.size)) {
1259
0
            return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1260
0
                                           std::string_view(rdata.data, rdata.size));
1261
0
        }
1262
1263
4.50k
        for (size_t i = 0; i < loffsets.size(); i++) {
1264
3.18k
            if (l_null_map && (*l_null_map)[i]) {
1265
228
                res[i] = 0;
1266
228
                null_map[i] = 1;
1267
228
                continue;
1268
228
            }
1269
1270
2.95k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
1271
2.95k
            int l_str_size = loffsets[i] - loffsets[i - 1];
1272
1273
2.95k
            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
1274
2.95k
        } //for
1275
1.32k
        return Status::OK();
1276
1.32k
    } //function
1277
};
1278
1279
struct JsonbTypeJson {
1280
    using T = std::string;
1281
    using ReturnType = DataTypeJsonb;
1282
    using ColumnType = ColumnString;
1283
    static const bool only_get_type = false;
1284
    static const bool no_quotes = false;
1285
};
1286
1287
struct JsonbTypeJsonNoQuotes {
1288
    using T = std::string;
1289
    using ReturnType = DataTypeJsonb;
1290
    using ColumnType = ColumnString;
1291
    static const bool only_get_type = false;
1292
    static const bool no_quotes = true;
1293
};
1294
1295
struct JsonbTypeType {
1296
    using T = std::string;
1297
    using ReturnType = DataTypeString;
1298
    using ColumnType = ColumnString;
1299
    static const bool only_get_type = true;
1300
    static const bool no_quotes = false;
1301
};
1302
1303
struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> {
1304
    static constexpr auto name = "jsonb_extract";
1305
    static constexpr auto alias = "json_extract";
1306
};
1307
1308
struct JsonbExtractJsonbNoQuotes : public JsonbExtractStringImpl<JsonbTypeJsonNoQuotes> {
1309
    static constexpr auto name = "jsonb_extract_no_quotes";
1310
    static constexpr auto alias = "json_extract_no_quotes";
1311
};
1312
1313
struct JsonbTypeImpl : public JsonbExtractStringImpl<JsonbTypeType> {
1314
    static constexpr auto name = "json_type";
1315
    static constexpr auto alias = "jsonb_type";
1316
};
1317
1318
using FunctionJsonbExists = FunctionJsonbExtractPath;
1319
using FunctionJsonbType = FunctionJsonbExtract<JsonbTypeImpl>;
1320
1321
using FunctionJsonbExtractIsnull = FunctionJsonbExtract<JsonbExtractIsnull>;
1322
using FunctionJsonbExtractJsonb = FunctionJsonbExtract<JsonbExtractJsonb>;
1323
using FunctionJsonbExtractJsonbNoQuotes = FunctionJsonbExtract<JsonbExtractJsonbNoQuotes>;
1324
1325
template <typename Impl>
1326
class FunctionJsonbLength : public IFunction {
1327
public:
1328
    static constexpr auto name = "json_length";
1329
1
    String get_name() const override { return name; }
1330
182
    static FunctionPtr create() { return std::make_shared<FunctionJsonbLength<Impl>>(); }
1331
1332
173
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1333
173
        return make_nullable(std::make_shared<DataTypeInt32>());
1334
173
    }
1335
181
    DataTypes get_variadic_argument_types_impl() const override {
1336
181
        return Impl::get_variadic_argument_types();
1337
181
    }
1338
173
    size_t get_number_of_arguments() const override {
1339
173
        return get_variadic_argument_types_impl().size();
1340
173
    }
1341
1342
352
    bool use_default_implementation_for_nulls() const override { return false; }
1343
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1344
179
                        uint32_t result, size_t input_rows_count) const override {
1345
179
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1346
179
    }
1347
};
1348
1349
struct JsonbLengthUtil {
1350
    static Status jsonb_length_execute(FunctionContext* context, Block& block,
1351
                                       const ColumnNumbers& arguments, uint32_t result,
1352
179
                                       size_t input_rows_count) {
1353
179
        DORIS_CHECK_GE(arguments.size(), 2);
1354
179
        ColumnPtr jsonb_data_column;
1355
179
        bool jsonb_data_const = false;
1356
        // prepare jsonb data column
1357
179
        std::tie(jsonb_data_column, jsonb_data_const) =
1358
179
                unpack_if_const(block.get_by_position(arguments[0]).column);
1359
179
        ColumnPtr path_column;
1360
179
        bool is_const = false;
1361
179
        std::tie(path_column, is_const) =
1362
179
                unpack_if_const(block.get_by_position(arguments[1]).column);
1363
1364
179
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1365
179
        auto return_type = block.get_data_type(result);
1366
179
        MutableColumnPtr res = return_type->create_column();
1367
1368
179
        JsonbPath path;
1369
179
        if (is_const) {
1370
151
            if (path_column->is_null_at(0)) {
1371
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1372
1
                    null_map->get_data()[i] = 1;
1373
1
                    res->insert_data(nullptr, 0);
1374
1
                }
1375
1376
1
                block.replace_by_position(
1377
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1378
1
                return Status::OK();
1379
1
            }
1380
1381
150
            auto path_value = path_column->get_data_at(0);
1382
150
            if (!path.seek(path_value.data, path_value.size)) {
1383
0
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1384
0
                                               std::string_view(path_value.data, path_value.size));
1385
0
            }
1386
150
        }
1387
1388
443
        for (size_t i = 0; i < input_rows_count; ++i) {
1389
265
            if (jsonb_data_column->is_null_at(i) || path_column->is_null_at(i) ||
1390
265
                (jsonb_data_column->get_data_at(i).size == 0)) {
1391
18
                null_map->get_data()[i] = 1;
1392
18
                res->insert_data(nullptr, 0);
1393
18
                continue;
1394
18
            }
1395
247
            if (!is_const) {
1396
25
                auto path_value = path_column->get_data_at(i);
1397
25
                path.clean();
1398
25
                if (!path.seek(path_value.data, path_value.size)) {
1399
0
                    return Status::InvalidArgument(
1400
0
                            "Json path error: Invalid Json Path for value: {}",
1401
0
                            std::string_view(path_value.data, path_value.size));
1402
0
                }
1403
25
            }
1404
247
            auto jsonb_value = jsonb_data_column->get_data_at(i);
1405
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1406
247
            const JsonbDocument* doc = nullptr;
1407
247
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value.data,
1408
247
                                                                  jsonb_value.size, &doc));
1409
247
            auto find_result = doc->getValue()->findValue(path);
1410
247
            const auto* value = find_result.value;
1411
247
            if (UNLIKELY(!value)) {
1412
74
                null_map->get_data()[i] = 1;
1413
74
                res->insert_data(nullptr, 0);
1414
74
                continue;
1415
74
            }
1416
173
            auto length = value->numElements();
1417
173
            res->insert_data(const_cast<const char*>((char*)&length), 0);
1418
173
        }
1419
178
        block.replace_by_position(result,
1420
178
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1421
178
        return Status::OK();
1422
178
    }
1423
};
1424
1425
struct JsonbLengthAndPathImpl {
1426
181
    static DataTypes get_variadic_argument_types() {
1427
181
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
1428
181
    }
1429
1430
    static Status execute_impl(FunctionContext* context, Block& block,
1431
                               const ColumnNumbers& arguments, uint32_t result,
1432
179
                               size_t input_rows_count) {
1433
179
        return JsonbLengthUtil::jsonb_length_execute(context, block, arguments, result,
1434
179
                                                     input_rows_count);
1435
179
    }
1436
};
1437
1438
template <typename Impl>
1439
class FunctionJsonbContains : public IFunction {
1440
public:
1441
    static constexpr auto name = "json_contains";
1442
1
    String get_name() const override { return name; }
1443
65
    static FunctionPtr create() { return std::make_shared<FunctionJsonbContains<Impl>>(); }
1444
1445
56
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1446
56
        return make_nullable(std::make_shared<DataTypeUInt8>());
1447
56
    }
1448
64
    DataTypes get_variadic_argument_types_impl() const override {
1449
64
        return Impl::get_variadic_argument_types();
1450
64
    }
1451
56
    size_t get_number_of_arguments() const override {
1452
56
        return get_variadic_argument_types_impl().size();
1453
56
    }
1454
1455
193
    bool use_default_implementation_for_nulls() const override { return false; }
1456
1457
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1458
137
                        uint32_t result, size_t input_rows_count) const override {
1459
137
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
1460
137
    }
1461
};
1462
1463
struct JsonbContainsUtil {
1464
    static Status jsonb_contains_execute(FunctionContext* context, Block& block,
1465
                                         const ColumnNumbers& arguments, uint32_t result,
1466
137
                                         size_t input_rows_count) {
1467
137
        DORIS_CHECK_GE(arguments.size(), 3);
1468
1469
137
        auto jsonb_data1_column = block.get_by_position(arguments[0]).column;
1470
137
        auto jsonb_data2_column = block.get_by_position(arguments[1]).column;
1471
1472
137
        ColumnPtr path_column;
1473
137
        bool is_const = false;
1474
137
        std::tie(path_column, is_const) =
1475
137
                unpack_if_const(block.get_by_position(arguments[2]).column);
1476
1477
137
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1478
137
        auto return_type = block.get_data_type(result);
1479
137
        MutableColumnPtr res = return_type->create_column();
1480
1481
137
        JsonbPath path;
1482
137
        if (is_const) {
1483
86
            if (path_column->is_null_at(0)) {
1484
2
                for (size_t i = 0; i < input_rows_count; ++i) {
1485
1
                    null_map->get_data()[i] = 1;
1486
1
                    res->insert_data(nullptr, 0);
1487
1
                }
1488
1489
1
                block.replace_by_position(
1490
1
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1491
1
                return Status::OK();
1492
1
            }
1493
1494
85
            auto path_value = path_column->get_data_at(0);
1495
85
            if (!path.seek(path_value.data, path_value.size)) {
1496
3
                return Status::InvalidArgument("Json path error: Invalid Json Path for value: {}",
1497
3
                                               std::string_view(path_value.data, path_value.size));
1498
3
            }
1499
85
        }
1500
1501
394
        for (size_t i = 0; i < input_rows_count; ++i) {
1502
262
            if (jsonb_data1_column->is_null_at(i) || jsonb_data2_column->is_null_at(i) ||
1503
262
                path_column->is_null_at(i)) {
1504
28
                null_map->get_data()[i] = 1;
1505
28
                res->insert_data(nullptr, 0);
1506
28
                continue;
1507
28
            }
1508
1509
234
            if (!is_const) {
1510
54
                auto path_value = path_column->get_data_at(i);
1511
54
                path.clean();
1512
54
                if (!path.seek(path_value.data, path_value.size)) {
1513
1
                    return Status::InvalidArgument(
1514
1
                            "Json path error: Invalid Json Path for value: {}",
1515
1
                            std::string_view(path_value.data, path_value.size));
1516
1
                }
1517
54
            }
1518
1519
233
            auto jsonb_value1 = jsonb_data1_column->get_data_at(i);
1520
233
            auto jsonb_value2 = jsonb_data2_column->get_data_at(i);
1521
1522
233
            if (jsonb_value1.size == 0 || jsonb_value2.size == 0) {
1523
1
                null_map->get_data()[i] = 1;
1524
1
                res->insert_data(nullptr, 0);
1525
1
                continue;
1526
1
            }
1527
            // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
1528
232
            const JsonbDocument* doc1 = nullptr;
1529
232
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value1.data,
1530
232
                                                                  jsonb_value1.size, &doc1));
1531
232
            const JsonbDocument* doc2 = nullptr;
1532
232
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(jsonb_value2.data,
1533
232
                                                                  jsonb_value2.size, &doc2));
1534
1535
232
            auto find_result = doc1->getValue()->findValue(path);
1536
232
            const auto* value1 = find_result.value;
1537
232
            const JsonbValue* value2 = doc2->getValue();
1538
232
            if (!value1 || !value2) {
1539
45
                null_map->get_data()[i] = 1;
1540
45
                res->insert_data(nullptr, 0);
1541
45
                continue;
1542
45
            }
1543
187
            auto contains_value = value1->contains(value2);
1544
187
            res->insert_data(const_cast<const char*>((char*)&contains_value), 0);
1545
187
        }
1546
1547
132
        block.replace_by_position(result,
1548
132
                                  ColumnNullable::create(std::move(res), std::move(null_map)));
1549
132
        return Status::OK();
1550
133
    }
1551
};
1552
1553
template <bool ignore_null>
1554
class FunctionJsonbArray : public IFunction {
1555
public:
1556
    static constexpr auto name = "json_array";
1557
    static constexpr auto alias = "jsonb_array";
1558
1559
51
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb0EE6createEv
Line
Count
Source
1559
40
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
_ZN5doris18FunctionJsonbArrayILb1EE6createEv
Line
Count
Source
1559
11
    static FunctionPtr create() { return std::make_shared<FunctionJsonbArray>(); }
1560
1561
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE8get_nameB5cxx11Ev
1562
1563
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionJsonbArrayILb1EE23get_number_of_argumentsEv
1564
35
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb0EE11is_variadicEv
Line
Count
Source
1564
32
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionJsonbArrayILb1EE11is_variadicEv
Line
Count
Source
1564
3
    bool is_variadic() const override { return true; }
1565
1566
64
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1566
60
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris18FunctionJsonbArrayILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1566
4
    bool use_default_implementation_for_nulls() const override { return false; }
1567
1568
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1569
33
        return std::make_shared<DataTypeJsonb>();
1570
33
    }
_ZNK5doris18FunctionJsonbArrayILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1568
31
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1569
31
        return std::make_shared<DataTypeJsonb>();
1570
31
    }
_ZNK5doris18FunctionJsonbArrayILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
1568
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1569
2
        return std::make_shared<DataTypeJsonb>();
1570
2
    }
1571
1572
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1573
33
                        uint32_t result, size_t input_rows_count) const override {
1574
33
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1575
33
        auto column = return_data_type->create_column();
1576
33
        column->reserve(input_rows_count);
1577
1578
33
        JsonbWriter writer;
1579
100
        for (size_t i = 0; i < input_rows_count; ++i) {
1580
63
            writer.writeStartArray();
1581
182
            for (auto argument : arguments) {
1582
182
                auto&& [arg_column, is_const] =
1583
182
                        unpack_if_const(block.get_by_position(argument).column);
1584
182
                if (const auto* nullable_column =
1585
182
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1586
83
                    const auto& null_map = nullable_column->get_null_map_data();
1587
83
                    const auto& nested_column = nullable_column->get_nested_column();
1588
83
                    const auto& jsonb_column =
1589
83
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1590
83
                                    nested_column);
1591
1592
83
                    auto index = index_check_const(i, is_const);
1593
83
                    if (null_map[index]) {
1594
30
                        if constexpr (ignore_null) {
1595
4
                            continue;
1596
26
                        } else {
1597
26
                            writer.writeNull();
1598
26
                        }
1599
53
                    } else {
1600
53
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1601
53
                        const JsonbDocument* doc = nullptr;
1602
53
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1603
53
                                                                        jsonb_binary.size, &doc);
1604
53
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1605
0
                            if constexpr (ignore_null) {
1606
0
                                continue;
1607
0
                            } else {
1608
0
                                writer.writeNull();
1609
0
                            }
1610
53
                        } else {
1611
53
                            writer.writeValue(doc->getValue());
1612
53
                        }
1613
53
                    }
1614
99
                } else {
1615
99
                    const auto& jsonb_column =
1616
99
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1617
99
                                    *arg_column);
1618
1619
99
                    auto index = index_check_const(i, is_const);
1620
99
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1621
99
                    const JsonbDocument* doc = nullptr;
1622
99
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
99
                                                                    jsonb_binary.size, &doc);
1624
99
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
0
                        if constexpr (ignore_null) {
1626
0
                            continue;
1627
0
                        } else {
1628
0
                            writer.writeNull();
1629
0
                        }
1630
99
                    } else {
1631
99
                        writer.writeValue(doc->getValue());
1632
99
                    }
1633
99
                }
1634
182
            }
1635
19
            writer.writeEndArray();
1636
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1637
19
            writer.reset();
1638
19
        }
1639
1640
6
        block.get_by_position(result).column = std::move(column);
1641
6
        return Status::OK();
1642
33
    }
_ZNK5doris18FunctionJsonbArrayILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1573
31
                        uint32_t result, size_t input_rows_count) const override {
1574
31
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1575
31
        auto column = return_data_type->create_column();
1576
31
        column->reserve(input_rows_count);
1577
1578
31
        JsonbWriter writer;
1579
79
        for (size_t i = 0; i < input_rows_count; ++i) {
1580
48
            writer.writeStartArray();
1581
152
            for (auto argument : arguments) {
1582
152
                auto&& [arg_column, is_const] =
1583
152
                        unpack_if_const(block.get_by_position(argument).column);
1584
152
                if (const auto* nullable_column =
1585
152
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1586
58
                    const auto& null_map = nullable_column->get_null_map_data();
1587
58
                    const auto& nested_column = nullable_column->get_nested_column();
1588
58
                    const auto& jsonb_column =
1589
58
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1590
58
                                    nested_column);
1591
1592
58
                    auto index = index_check_const(i, is_const);
1593
58
                    if (null_map[index]) {
1594
                        if constexpr (ignore_null) {
1595
                            continue;
1596
26
                        } else {
1597
26
                            writer.writeNull();
1598
26
                        }
1599
32
                    } else {
1600
32
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1601
32
                        const JsonbDocument* doc = nullptr;
1602
32
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1603
32
                                                                        jsonb_binary.size, &doc);
1604
32
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1605
                            if constexpr (ignore_null) {
1606
                                continue;
1607
0
                            } else {
1608
0
                                writer.writeNull();
1609
0
                            }
1610
32
                        } else {
1611
32
                            writer.writeValue(doc->getValue());
1612
32
                        }
1613
32
                    }
1614
94
                } else {
1615
94
                    const auto& jsonb_column =
1616
94
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1617
94
                                    *arg_column);
1618
1619
94
                    auto index = index_check_const(i, is_const);
1620
94
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1621
94
                    const JsonbDocument* doc = nullptr;
1622
94
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
94
                                                                    jsonb_binary.size, &doc);
1624
94
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
                        if constexpr (ignore_null) {
1626
                            continue;
1627
0
                        } else {
1628
0
                            writer.writeNull();
1629
0
                        }
1630
94
                    } else {
1631
94
                        writer.writeValue(doc->getValue());
1632
94
                    }
1633
94
                }
1634
152
            }
1635
48
            writer.writeEndArray();
1636
48
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1637
48
            writer.reset();
1638
48
        }
1639
1640
31
        block.get_by_position(result).column = std::move(column);
1641
31
        return Status::OK();
1642
31
    }
_ZNK5doris18FunctionJsonbArrayILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1573
2
                        uint32_t result, size_t input_rows_count) const override {
1574
2
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1575
2
        auto column = return_data_type->create_column();
1576
2
        column->reserve(input_rows_count);
1577
1578
2
        JsonbWriter writer;
1579
21
        for (size_t i = 0; i < input_rows_count; ++i) {
1580
15
            writer.writeStartArray();
1581
30
            for (auto argument : arguments) {
1582
30
                auto&& [arg_column, is_const] =
1583
30
                        unpack_if_const(block.get_by_position(argument).column);
1584
30
                if (const auto* nullable_column =
1585
30
                            check_and_get_column<ColumnNullable>(arg_column.get())) {
1586
25
                    const auto& null_map = nullable_column->get_null_map_data();
1587
25
                    const auto& nested_column = nullable_column->get_nested_column();
1588
25
                    const auto& jsonb_column =
1589
25
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1590
25
                                    nested_column);
1591
1592
25
                    auto index = index_check_const(i, is_const);
1593
25
                    if (null_map[index]) {
1594
4
                        if constexpr (ignore_null) {
1595
4
                            continue;
1596
                        } else {
1597
                            writer.writeNull();
1598
                        }
1599
21
                    } else {
1600
21
                        auto jsonb_binary = jsonb_column.get_data_at(index);
1601
21
                        const JsonbDocument* doc = nullptr;
1602
21
                        auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1603
21
                                                                        jsonb_binary.size, &doc);
1604
21
                        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1605
0
                            if constexpr (ignore_null) {
1606
0
                                continue;
1607
                            } else {
1608
                                writer.writeNull();
1609
                            }
1610
21
                        } else {
1611
21
                            writer.writeValue(doc->getValue());
1612
21
                        }
1613
21
                    }
1614
25
                } else {
1615
5
                    const auto& jsonb_column =
1616
5
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1617
5
                                    *arg_column);
1618
1619
5
                    auto index = index_check_const(i, is_const);
1620
5
                    auto jsonb_binary = jsonb_column.get_data_at(index);
1621
5
                    const JsonbDocument* doc = nullptr;
1622
5
                    auto st = JsonbDocument::checkAndCreateDocument(jsonb_binary.data,
1623
5
                                                                    jsonb_binary.size, &doc);
1624
5
                    if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
1625
0
                        if constexpr (ignore_null) {
1626
0
                            continue;
1627
                        } else {
1628
                            writer.writeNull();
1629
                        }
1630
5
                    } else {
1631
5
                        writer.writeValue(doc->getValue());
1632
5
                    }
1633
5
                }
1634
30
            }
1635
19
            writer.writeEndArray();
1636
19
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1637
19
            writer.reset();
1638
19
        }
1639
1640
6
        block.get_by_position(result).column = std::move(column);
1641
6
        return Status::OK();
1642
2
    }
1643
};
1644
1645
class FunctionJsonbObject : public IFunction {
1646
public:
1647
    static constexpr auto name = "json_object";
1648
    static constexpr auto alias = "jsonb_object";
1649
1650
42
    static FunctionPtr create() { return std::make_shared<FunctionJsonbObject>(); }
1651
1652
0
    String get_name() const override { return name; }
1653
1654
0
    size_t get_number_of_arguments() const override { return 0; }
1655
34
    bool is_variadic() const override { return true; }
1656
1657
73
    bool use_default_implementation_for_nulls() const override { return false; }
1658
1659
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1660
33
        return std::make_shared<DataTypeJsonb>();
1661
33
    }
1662
1663
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1664
44
                        uint32_t result, size_t input_rows_count) const override {
1665
44
        if (arguments.size() % 2 != 0) {
1666
0
            return Status::InvalidArgument(
1667
0
                    "JSON object must have an even number of arguments, but got: {}",
1668
0
                    arguments.size());
1669
0
        }
1670
1671
44
        auto return_data_type = std::make_shared<DataTypeJsonb>();
1672
1673
44
        auto write_key = [](JsonbWriter& writer, const ColumnString& key_col, const bool is_const,
1674
194
                            const NullMap* null_map, const size_t arg_index, const size_t row_idx) {
1675
194
            auto index = index_check_const(row_idx, is_const);
1676
194
            if (null_map && (*null_map)[index]) {
1677
1
                return Status::InvalidArgument(
1678
1
                        "JSON documents may not contain NULL member name(argument "
1679
1
                        "index:  "
1680
1
                        "{}, row index: {})",
1681
1
                        row_idx, arg_index);
1682
1
            }
1683
1684
193
            auto key_string = key_col.get_data_at(index);
1685
193
            if (key_string.size > 255) {
1686
0
                return Status::InvalidArgument(
1687
0
                        "JSON object keys(argument index: {}) must be less than 256 "
1688
0
                        "bytes, but got size: {}",
1689
0
                        arg_index, key_string.size);
1690
0
            }
1691
193
            writer.writeKey(key_string.data, static_cast<uint8_t>(key_string.size));
1692
193
            return Status::OK();
1693
193
        };
1694
1695
44
        auto write_value = [](JsonbWriter& writer, const ColumnString& value_col,
1696
44
                              const bool is_const, const NullMap* null_map, const size_t arg_index,
1697
193
                              const size_t row_idx) {
1698
193
            auto index = index_check_const(row_idx, is_const);
1699
193
            if (null_map && (*null_map)[index]) {
1700
46
                writer.writeNull();
1701
46
                return Status::OK();
1702
46
            }
1703
1704
147
            auto value_string = value_col.get_data_at(index);
1705
147
            const JsonbDocument* doc = nullptr;
1706
147
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
1707
147
                                                                  value_string.size, &doc));
1708
147
            writer.writeValue(doc->getValue());
1709
147
            return Status::OK();
1710
147
        };
1711
1712
141
        for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1713
97
            auto key_argument = arguments[arg_idx];
1714
97
            auto value_argument = arguments[arg_idx + 1];
1715
1716
97
            auto& key_data_type = block.get_by_position(key_argument).type;
1717
97
            auto& value_data_type = block.get_by_position(value_argument).type;
1718
97
            if (!is_string_type(key_data_type->get_primitive_type())) {
1719
0
                return Status::InvalidArgument(
1720
0
                        "JSON object key(argument index: {}) must be String, but got type: "
1721
0
                        "{}(primitive type: {})",
1722
0
                        arg_idx, key_data_type->get_name(),
1723
0
                        static_cast<int>(key_data_type->get_primitive_type()));
1724
0
            }
1725
1726
97
            if (value_data_type->get_primitive_type() != PrimitiveType::TYPE_JSONB) {
1727
0
                return Status::InvalidArgument(
1728
0
                        "JSON object value(argument index: {}) must be JSON, but got type: {}",
1729
0
                        arg_idx, value_data_type->get_name());
1730
0
            }
1731
97
        }
1732
1733
44
        auto column = return_data_type->create_column();
1734
44
        column->reserve(input_rows_count);
1735
1736
44
        JsonbWriter writer;
1737
108
        for (size_t i = 0; i != input_rows_count; ++i) {
1738
65
            writer.writeStartObject();
1739
258
            for (size_t arg_idx = 0; arg_idx != arguments.size(); arg_idx += 2) {
1740
194
                auto key_argument = arguments[arg_idx];
1741
194
                auto value_argument = arguments[arg_idx + 1];
1742
194
                auto&& [key_column, key_const] =
1743
194
                        unpack_if_const(block.get_by_position(key_argument).column);
1744
194
                auto&& [value_column, value_const] =
1745
194
                        unpack_if_const(block.get_by_position(value_argument).column);
1746
1747
194
                if (const auto* nullable_column =
1748
194
                            check_and_get_column<ColumnNullable>(key_column.get())) {
1749
3
                    const auto& null_map = nullable_column->get_null_map_data();
1750
3
                    const auto& nested_column = nullable_column->get_nested_column();
1751
3
                    const auto& key_arg_column =
1752
3
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1753
3
                                    nested_column);
1754
1755
3
                    RETURN_IF_ERROR(
1756
3
                            write_key(writer, key_arg_column, key_const, &null_map, arg_idx, i));
1757
191
                } else {
1758
191
                    const auto& key_arg_column =
1759
191
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1760
191
                                    *key_column);
1761
191
                    RETURN_IF_ERROR(
1762
191
                            write_key(writer, key_arg_column, key_const, nullptr, arg_idx, i));
1763
191
                }
1764
1765
193
                if (const auto* nullable_column =
1766
193
                            check_and_get_column<ColumnNullable>(value_column.get())) {
1767
93
                    const auto& null_map = nullable_column->get_null_map_data();
1768
93
                    const auto& nested_column = nullable_column->get_nested_column();
1769
93
                    const auto& value_arg_column =
1770
93
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1771
93
                                    nested_column);
1772
1773
93
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, &null_map,
1774
93
                                                arg_idx + 1, i));
1775
100
                } else {
1776
100
                    const auto& value_arg_column =
1777
100
                            assert_cast<const ColumnString&, TypeCheckOnRelease::DISABLE>(
1778
100
                                    *value_column);
1779
100
                    RETURN_IF_ERROR(write_value(writer, value_arg_column, value_const, nullptr,
1780
100
                                                arg_idx + 1, i));
1781
100
                }
1782
193
            }
1783
1784
64
            writer.writeEndObject();
1785
64
            column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
1786
64
            writer.reset();
1787
64
        }
1788
1789
43
        block.get_by_position(result).column = std::move(column);
1790
43
        return Status::OK();
1791
44
    }
1792
};
1793
1794
enum class JsonbModifyType { Insert, Set, Replace };
1795
1796
template <JsonbModifyType modify_type>
1797
struct JsonbModifyName {
1798
    static constexpr auto name = "jsonb_modify";
1799
    static constexpr auto alias = "json_modify";
1800
};
1801
1802
template <>
1803
struct JsonbModifyName<JsonbModifyType::Insert> {
1804
    static constexpr auto name = "jsonb_insert";
1805
    static constexpr auto alias = "json_insert";
1806
};
1807
template <>
1808
struct JsonbModifyName<JsonbModifyType::Set> {
1809
    static constexpr auto name = "jsonb_set";
1810
    static constexpr auto alias = "json_set";
1811
};
1812
template <>
1813
struct JsonbModifyName<JsonbModifyType::Replace> {
1814
    static constexpr auto name = "jsonb_replace";
1815
    static constexpr auto alias = "json_replace";
1816
};
1817
1818
template <JsonbModifyType modify_type>
1819
class FunctionJsonbModify : public IFunction {
1820
public:
1821
    static constexpr auto name = JsonbModifyName<modify_type>::name;
1822
    static constexpr auto alias = JsonbModifyName<modify_type>::alias;
1823
1824
112
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE6createEv
Line
Count
Source
1824
38
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE6createEv
Line
Count
Source
1824
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
_ZN5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE6createEv
Line
Count
Source
1824
37
    static FunctionPtr create() { return std::make_shared<FunctionJsonbModify<modify_type>>(); }
1825
1826
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE8get_nameB5cxx11Ev
1827
1828
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE23get_number_of_argumentsEv
1829
88
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE11is_variadicEv
Line
Count
Source
1829
30
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE11is_variadicEv
Line
Count
Source
1829
29
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE11is_variadicEv
Line
Count
Source
1829
29
    bool is_variadic() const override { return true; }
1830
1831
170
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE36use_default_implementation_for_nullsEv
Line
Count
Source
1831
58
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE36use_default_implementation_for_nullsEv
Line
Count
Source
1831
56
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE36use_default_implementation_for_nullsEv
Line
Count
Source
1831
56
    bool use_default_implementation_for_nulls() const override { return false; }
1832
1833
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1834
85
        return make_nullable(std::make_shared<DataTypeJsonb>());
1835
85
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1833
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1834
29
        return make_nullable(std::make_shared<DataTypeJsonb>());
1835
29
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1833
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1834
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1835
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1833
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1834
28
        return make_nullable(std::make_shared<DataTypeJsonb>());
1835
28
    }
1836
1837
    Status create_all_null_result(const DataTypePtr& return_data_type, Block& block,
1838
0
                                  uint32_t result, size_t input_rows_count) const {
1839
0
        auto result_column = return_data_type->create_column();
1840
0
        result_column->insert_default();
1841
0
        auto const_column = ColumnConst::create(std::move(result_column), input_rows_count);
1842
0
        block.get_by_position(result).column = std::move(const_column);
1843
0
        return Status::OK();
1844
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
Unexecuted instantiation: _ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22create_all_null_resultERKSt10shared_ptrIKNS_9IDataTypeEERNS_5BlockEjm
1845
1846
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1847
85
                        uint32_t result, size_t input_rows_count) const override {
1848
85
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1849
0
            return Status::InvalidArgument(
1850
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1851
0
                    "but got: {}",
1852
0
                    name, arguments.size());
1853
0
        }
1854
1855
85
        const size_t keys_count = (arguments.size() - 1) / 2;
1856
1857
85
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1858
1859
85
        auto result_column = return_data_type->create_column();
1860
85
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1861
85
        auto& null_map = result_nullable_col.get_null_map_data();
1862
85
        auto& res_string_column =
1863
85
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1864
85
        auto& res_chars = res_string_column.get_chars();
1865
85
        auto& res_offsets = res_string_column.get_offsets();
1866
1867
85
        null_map.resize_fill(input_rows_count, 0);
1868
85
        res_offsets.resize(input_rows_count);
1869
85
        auto&& [json_data_arg_column, json_data_const] =
1870
85
                unpack_if_const(block.get_by_position(arguments[0]).column);
1871
1872
85
        if (json_data_const) {
1873
11
            if (json_data_arg_column->is_null_at(0)) {
1874
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1875
0
            }
1876
11
        }
1877
1878
85
        std::vector<const ColumnString*> json_path_columns(keys_count);
1879
85
        std::vector<bool> json_path_constant(keys_count);
1880
85
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1881
1882
85
        std::vector<const ColumnString*> json_value_columns(keys_count);
1883
85
        std::vector<bool> json_value_constant(keys_count);
1884
85
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1885
1886
85
        const NullMap* json_data_null_map = nullptr;
1887
85
        const ColumnString* json_data_column;
1888
85
        if (const auto* nullable_column =
1889
85
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1890
85
            json_data_null_map = &nullable_column->get_null_map_data();
1891
85
            const auto& nested_column = nullable_column->get_nested_column();
1892
85
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1893
85
        } else {
1894
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1895
0
        }
1896
1897
195
        for (size_t i = 1; i < arguments.size(); i += 2) {
1898
110
            auto&& [path_column, path_const] =
1899
110
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1900
110
            auto&& [value_column, value_const] =
1901
110
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1902
1903
110
            if (path_const) {
1904
27
                if (path_column->is_null_at(0)) {
1905
0
                    return create_all_null_result(return_data_type, block, result,
1906
0
                                                  input_rows_count);
1907
0
                }
1908
27
            }
1909
1910
110
            json_path_constant[i / 2] = path_const;
1911
110
            if (const auto* nullable_column =
1912
110
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1913
8
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1914
8
                const auto& nested_column = nullable_column->get_nested_column();
1915
8
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1916
102
            } else {
1917
102
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1918
102
            }
1919
1920
110
            json_value_constant[i / 2] = value_const;
1921
110
            if (const auto* nullable_column =
1922
110
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1923
53
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1924
53
                const auto& nested_column = nullable_column->get_nested_column();
1925
53
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1926
57
            } else {
1927
57
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1928
57
            }
1929
110
        }
1930
1931
85
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1932
85
        if (json_data_const) {
1933
11
            auto json_data_string = json_data_column->get_data_at(0);
1934
11
            const JsonbDocument* doc = nullptr;
1935
11
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1936
11
                                                                  json_data_string.size, &doc));
1937
11
            if (!doc || !doc->getValue()) [[unlikely]] {
1938
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1939
0
            }
1940
62
            for (size_t i = 0; i != input_rows_count; ++i) {
1941
51
                json_documents[i] = doc;
1942
51
            }
1943
74
        } else {
1944
152
            for (size_t i = 0; i != input_rows_count; ++i) {
1945
78
                if (json_data_null_map && (*json_data_null_map)[i]) {
1946
0
                    null_map[i] = 1;
1947
0
                    json_documents[i] = nullptr;
1948
0
                    continue;
1949
0
                }
1950
1951
78
                auto json_data_string = json_data_column->get_data_at(i);
1952
78
                const JsonbDocument* doc = nullptr;
1953
78
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1954
78
                                                                      json_data_string.size, &doc));
1955
78
                if (!doc || !doc->getValue()) [[unlikely]] {
1956
0
                    null_map[i] = 1;
1957
0
                    continue;
1958
0
                }
1959
78
                json_documents[i] = doc;
1960
78
            }
1961
74
        }
1962
1963
85
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1964
85
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1965
1966
85
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1967
85
                                               json_path_columns, json_path_constant,
1968
85
                                               json_path_null_maps, json_value_columns,
1969
85
                                               json_value_constant, json_value_null_maps));
1970
1971
77
        JsonbWriter writer;
1972
77
        struct DocumentBuffer {
1973
77
            DorisUniqueBufferPtr<char> ptr;
1974
77
            size_t size = 0;
1975
77
            size_t capacity = 0;
1976
77
        };
1977
1978
77
        DocumentBuffer tmp_buffer;
1979
1980
218
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1981
341
            for (size_t i = 1; i < arguments.size(); i += 2) {
1982
200
                const size_t index = i / 2;
1983
200
                auto& json_path = json_paths[index];
1984
200
                auto& json_value = json_values[index];
1985
1986
200
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1987
200
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1988
1989
200
                if (null_map[row_idx]) {
1990
0
                    continue;
1991
0
                }
1992
1993
200
                if (json_documents[row_idx] == nullptr) {
1994
0
                    null_map[row_idx] = 1;
1995
0
                    continue;
1996
0
                }
1997
1998
200
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
1999
4
                    null_map[row_idx] = 1;
2000
4
                    continue;
2001
4
                }
2002
2003
196
                auto find_result =
2004
196
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2005
2006
196
                if (find_result.is_wildcard) {
2007
0
                    return Status::InvalidArgument(
2008
0
                            " In this situation, path expressions may not contain the * and ** "
2009
0
                            "tokens or an array range, argument index: {}, row index: {}",
2010
0
                            i, row_idx);
2011
0
                }
2012
2013
196
                if constexpr (modify_type == JsonbModifyType::Insert) {
2014
62
                    if (find_result.value) {
2015
18
                        continue;
2016
18
                    }
2017
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2018
67
                    if (!find_result.value) {
2019
11
                        continue;
2020
11
                    }
2021
67
                }
2022
2023
100
                std::vector<const JsonbValue*> parents;
2024
2025
196
                bool replace = false;
2026
196
                parents.emplace_back(json_documents[row_idx]->getValue());
2027
196
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2028
196
                if (find_result.value) {
2029
                    // find target path, replace it with the new value.
2030
100
                    replace = true;
2031
100
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2032
100
                                               json_path[path_index], parents)) {
2033
0
                        continue;
2034
0
                    }
2035
100
                } else {
2036
                    // does not find target path, insert the new value.
2037
96
                    JsonbPath new_path;
2038
96
                    DCHECK_GT(legs_count, 0);
2039
156
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2040
60
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2041
60
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2042
60
                                current_leg->leg_ptr, current_leg->leg_len,
2043
60
                                current_leg->array_index, current_leg->type);
2044
60
                        new_path.add_leg_to_leg_vector(std::move(leg));
2045
60
                    }
2046
2047
96
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2048
96
                                               parents)) {
2049
12
                        continue;
2050
12
                    }
2051
96
                }
2052
2053
184
                leg_info* last_leg =
2054
184
                        legs_count > 0
2055
184
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2056
184
                                : nullptr;
2057
184
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2058
184
                                                 json_value[value_index], replace, last_leg,
2059
184
                                                 writer));
2060
2061
184
                auto* writer_output = writer.getOutput();
2062
184
                if (writer_output->getSize() > tmp_buffer.capacity) {
2063
67
                    tmp_buffer.capacity =
2064
67
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2065
67
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2066
67
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2067
67
                }
2068
2069
184
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2070
184
                tmp_buffer.size = writer_output->getSize();
2071
2072
184
                writer.reset();
2073
2074
184
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2075
184
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2076
184
            }
2077
2078
141
            if (!null_map[row_idx]) {
2079
108
                const auto* jsonb_document = json_documents[row_idx];
2080
108
                const auto size = jsonb_document->numPackedBytes();
2081
108
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2082
108
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2083
108
            }
2084
2085
141
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2086
2087
141
            if (!null_map[row_idx]) {
2088
108
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2089
108
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2090
108
                const JsonbDocument* doc = nullptr;
2091
108
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2092
108
                        reinterpret_cast<const char*>(ptr), size, &doc));
2093
108
            }
2094
141
        }
2095
2096
106
        block.get_by_position(result).column = std::move(result_column);
2097
106
        return Status::OK();
2098
77
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1847
29
                        uint32_t result, size_t input_rows_count) const override {
1848
29
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1849
0
            return Status::InvalidArgument(
1850
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1851
0
                    "but got: {}",
1852
0
                    name, arguments.size());
1853
0
        }
1854
1855
29
        const size_t keys_count = (arguments.size() - 1) / 2;
1856
1857
29
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1858
1859
29
        auto result_column = return_data_type->create_column();
1860
29
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1861
29
        auto& null_map = result_nullable_col.get_null_map_data();
1862
29
        auto& res_string_column =
1863
29
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1864
29
        auto& res_chars = res_string_column.get_chars();
1865
29
        auto& res_offsets = res_string_column.get_offsets();
1866
1867
29
        null_map.resize_fill(input_rows_count, 0);
1868
29
        res_offsets.resize(input_rows_count);
1869
29
        auto&& [json_data_arg_column, json_data_const] =
1870
29
                unpack_if_const(block.get_by_position(arguments[0]).column);
1871
1872
29
        if (json_data_const) {
1873
5
            if (json_data_arg_column->is_null_at(0)) {
1874
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1875
0
            }
1876
5
        }
1877
1878
29
        std::vector<const ColumnString*> json_path_columns(keys_count);
1879
29
        std::vector<bool> json_path_constant(keys_count);
1880
29
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1881
1882
29
        std::vector<const ColumnString*> json_value_columns(keys_count);
1883
29
        std::vector<bool> json_value_constant(keys_count);
1884
29
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1885
1886
29
        const NullMap* json_data_null_map = nullptr;
1887
29
        const ColumnString* json_data_column;
1888
29
        if (const auto* nullable_column =
1889
29
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1890
29
            json_data_null_map = &nullable_column->get_null_map_data();
1891
29
            const auto& nested_column = nullable_column->get_nested_column();
1892
29
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1893
29
        } else {
1894
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1895
0
        }
1896
1897
65
        for (size_t i = 1; i < arguments.size(); i += 2) {
1898
36
            auto&& [path_column, path_const] =
1899
36
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1900
36
            auto&& [value_column, value_const] =
1901
36
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1902
1903
36
            if (path_const) {
1904
7
                if (path_column->is_null_at(0)) {
1905
0
                    return create_all_null_result(return_data_type, block, result,
1906
0
                                                  input_rows_count);
1907
0
                }
1908
7
            }
1909
1910
36
            json_path_constant[i / 2] = path_const;
1911
36
            if (const auto* nullable_column =
1912
36
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1913
5
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1914
5
                const auto& nested_column = nullable_column->get_nested_column();
1915
5
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1916
31
            } else {
1917
31
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1918
31
            }
1919
1920
36
            json_value_constant[i / 2] = value_const;
1921
36
            if (const auto* nullable_column =
1922
36
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1923
17
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1924
17
                const auto& nested_column = nullable_column->get_nested_column();
1925
17
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1926
19
            } else {
1927
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1928
19
            }
1929
36
        }
1930
1931
29
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1932
29
        if (json_data_const) {
1933
5
            auto json_data_string = json_data_column->get_data_at(0);
1934
5
            const JsonbDocument* doc = nullptr;
1935
5
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1936
5
                                                                  json_data_string.size, &doc));
1937
5
            if (!doc || !doc->getValue()) [[unlikely]] {
1938
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1939
0
            }
1940
30
            for (size_t i = 0; i != input_rows_count; ++i) {
1941
25
                json_documents[i] = doc;
1942
25
            }
1943
24
        } else {
1944
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1945
26
                if (json_data_null_map && (*json_data_null_map)[i]) {
1946
0
                    null_map[i] = 1;
1947
0
                    json_documents[i] = nullptr;
1948
0
                    continue;
1949
0
                }
1950
1951
26
                auto json_data_string = json_data_column->get_data_at(i);
1952
26
                const JsonbDocument* doc = nullptr;
1953
26
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1954
26
                                                                      json_data_string.size, &doc));
1955
26
                if (!doc || !doc->getValue()) [[unlikely]] {
1956
0
                    null_map[i] = 1;
1957
0
                    continue;
1958
0
                }
1959
26
                json_documents[i] = doc;
1960
26
            }
1961
24
        }
1962
1963
29
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1964
29
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1965
1966
29
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1967
29
                                               json_path_columns, json_path_constant,
1968
29
                                               json_path_null_maps, json_value_columns,
1969
29
                                               json_value_constant, json_value_null_maps));
1970
1971
25
        JsonbWriter writer;
1972
25
        struct DocumentBuffer {
1973
25
            DorisUniqueBufferPtr<char> ptr;
1974
25
            size_t size = 0;
1975
25
            size_t capacity = 0;
1976
25
        };
1977
1978
25
        DocumentBuffer tmp_buffer;
1979
1980
81
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1981
120
            for (size_t i = 1; i < arguments.size(); i += 2) {
1982
64
                const size_t index = i / 2;
1983
64
                auto& json_path = json_paths[index];
1984
64
                auto& json_value = json_values[index];
1985
1986
64
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1987
64
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1988
1989
64
                if (null_map[row_idx]) {
1990
0
                    continue;
1991
0
                }
1992
1993
64
                if (json_documents[row_idx] == nullptr) {
1994
0
                    null_map[row_idx] = 1;
1995
0
                    continue;
1996
0
                }
1997
1998
64
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
1999
2
                    null_map[row_idx] = 1;
2000
2
                    continue;
2001
2
                }
2002
2003
62
                auto find_result =
2004
62
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2005
2006
62
                if (find_result.is_wildcard) {
2007
0
                    return Status::InvalidArgument(
2008
0
                            " In this situation, path expressions may not contain the * and ** "
2009
0
                            "tokens or an array range, argument index: {}, row index: {}",
2010
0
                            i, row_idx);
2011
0
                }
2012
2013
62
                if constexpr (modify_type == JsonbModifyType::Insert) {
2014
62
                    if (find_result.value) {
2015
18
                        continue;
2016
18
                    }
2017
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2018
                    if (!find_result.value) {
2019
                        continue;
2020
                    }
2021
                }
2022
2023
44
                std::vector<const JsonbValue*> parents;
2024
2025
62
                bool replace = false;
2026
62
                parents.emplace_back(json_documents[row_idx]->getValue());
2027
62
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2028
62
                if (find_result.value) {
2029
                    // find target path, replace it with the new value.
2030
0
                    replace = true;
2031
0
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2032
0
                                               json_path[path_index], parents)) {
2033
0
                        continue;
2034
0
                    }
2035
62
                } else {
2036
                    // does not find target path, insert the new value.
2037
62
                    JsonbPath new_path;
2038
62
                    DCHECK_GT(legs_count, 0);
2039
103
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2040
41
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2041
41
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2042
41
                                current_leg->leg_ptr, current_leg->leg_len,
2043
41
                                current_leg->array_index, current_leg->type);
2044
41
                        new_path.add_leg_to_leg_vector(std::move(leg));
2045
41
                    }
2046
2047
62
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2048
62
                                               parents)) {
2049
1
                        continue;
2050
1
                    }
2051
62
                }
2052
2053
61
                leg_info* last_leg =
2054
61
                        legs_count > 0
2055
61
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2056
61
                                : nullptr;
2057
61
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2058
61
                                                 json_value[value_index], replace, last_leg,
2059
61
                                                 writer));
2060
2061
61
                auto* writer_output = writer.getOutput();
2062
61
                if (writer_output->getSize() > tmp_buffer.capacity) {
2063
20
                    tmp_buffer.capacity =
2064
20
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2065
20
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2066
20
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2067
20
                }
2068
2069
61
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2070
61
                tmp_buffer.size = writer_output->getSize();
2071
2072
61
                writer.reset();
2073
2074
61
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2075
61
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2076
61
            }
2077
2078
56
            if (!null_map[row_idx]) {
2079
36
                const auto* jsonb_document = json_documents[row_idx];
2080
36
                const auto size = jsonb_document->numPackedBytes();
2081
36
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2082
36
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2083
36
            }
2084
2085
56
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2086
2087
56
            if (!null_map[row_idx]) {
2088
36
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2089
36
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2090
36
                const JsonbDocument* doc = nullptr;
2091
36
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2092
36
                        reinterpret_cast<const char*>(ptr), size, &doc));
2093
36
            }
2094
56
        }
2095
2096
43
        block.get_by_position(result).column = std::move(result_column);
2097
43
        return Status::OK();
2098
25
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1847
28
                        uint32_t result, size_t input_rows_count) const override {
1848
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1849
0
            return Status::InvalidArgument(
1850
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1851
0
                    "but got: {}",
1852
0
                    name, arguments.size());
1853
0
        }
1854
1855
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1856
1857
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1858
1859
28
        auto result_column = return_data_type->create_column();
1860
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1861
28
        auto& null_map = result_nullable_col.get_null_map_data();
1862
28
        auto& res_string_column =
1863
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1864
28
        auto& res_chars = res_string_column.get_chars();
1865
28
        auto& res_offsets = res_string_column.get_offsets();
1866
1867
28
        null_map.resize_fill(input_rows_count, 0);
1868
28
        res_offsets.resize(input_rows_count);
1869
28
        auto&& [json_data_arg_column, json_data_const] =
1870
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1871
1872
28
        if (json_data_const) {
1873
3
            if (json_data_arg_column->is_null_at(0)) {
1874
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1875
0
            }
1876
3
        }
1877
1878
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1879
28
        std::vector<bool> json_path_constant(keys_count);
1880
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1881
1882
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1883
28
        std::vector<bool> json_value_constant(keys_count);
1884
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1885
1886
28
        const NullMap* json_data_null_map = nullptr;
1887
28
        const ColumnString* json_data_column;
1888
28
        if (const auto* nullable_column =
1889
28
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1890
28
            json_data_null_map = &nullable_column->get_null_map_data();
1891
28
            const auto& nested_column = nullable_column->get_nested_column();
1892
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1893
28
        } else {
1894
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1895
0
        }
1896
1897
64
        for (size_t i = 1; i < arguments.size(); i += 2) {
1898
36
            auto&& [path_column, path_const] =
1899
36
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1900
36
            auto&& [value_column, value_const] =
1901
36
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1902
1903
36
            if (path_const) {
1904
9
                if (path_column->is_null_at(0)) {
1905
0
                    return create_all_null_result(return_data_type, block, result,
1906
0
                                                  input_rows_count);
1907
0
                }
1908
9
            }
1909
1910
36
            json_path_constant[i / 2] = path_const;
1911
36
            if (const auto* nullable_column =
1912
36
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1913
2
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1914
2
                const auto& nested_column = nullable_column->get_nested_column();
1915
2
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1916
34
            } else {
1917
34
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1918
34
            }
1919
1920
36
            json_value_constant[i / 2] = value_const;
1921
36
            if (const auto* nullable_column =
1922
36
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1923
17
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1924
17
                const auto& nested_column = nullable_column->get_nested_column();
1925
17
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1926
19
            } else {
1927
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1928
19
            }
1929
36
        }
1930
1931
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1932
28
        if (json_data_const) {
1933
3
            auto json_data_string = json_data_column->get_data_at(0);
1934
3
            const JsonbDocument* doc = nullptr;
1935
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1936
3
                                                                  json_data_string.size, &doc));
1937
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1938
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1939
0
            }
1940
17
            for (size_t i = 0; i != input_rows_count; ++i) {
1941
14
                json_documents[i] = doc;
1942
14
            }
1943
25
        } else {
1944
52
            for (size_t i = 0; i != input_rows_count; ++i) {
1945
27
                if (json_data_null_map && (*json_data_null_map)[i]) {
1946
0
                    null_map[i] = 1;
1947
0
                    json_documents[i] = nullptr;
1948
0
                    continue;
1949
0
                }
1950
1951
27
                auto json_data_string = json_data_column->get_data_at(i);
1952
27
                const JsonbDocument* doc = nullptr;
1953
27
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1954
27
                                                                      json_data_string.size, &doc));
1955
27
                if (!doc || !doc->getValue()) [[unlikely]] {
1956
0
                    null_map[i] = 1;
1957
0
                    continue;
1958
0
                }
1959
27
                json_documents[i] = doc;
1960
27
            }
1961
25
        }
1962
1963
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1964
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1965
1966
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1967
28
                                               json_path_columns, json_path_constant,
1968
28
                                               json_path_null_maps, json_value_columns,
1969
28
                                               json_value_constant, json_value_null_maps));
1970
1971
26
        JsonbWriter writer;
1972
26
        struct DocumentBuffer {
1973
26
            DorisUniqueBufferPtr<char> ptr;
1974
26
            size_t size = 0;
1975
26
            size_t capacity = 0;
1976
26
        };
1977
1978
26
        DocumentBuffer tmp_buffer;
1979
1980
65
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1981
107
            for (size_t i = 1; i < arguments.size(); i += 2) {
1982
68
                const size_t index = i / 2;
1983
68
                auto& json_path = json_paths[index];
1984
68
                auto& json_value = json_values[index];
1985
1986
68
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1987
68
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1988
1989
68
                if (null_map[row_idx]) {
1990
0
                    continue;
1991
0
                }
1992
1993
68
                if (json_documents[row_idx] == nullptr) {
1994
0
                    null_map[row_idx] = 1;
1995
0
                    continue;
1996
0
                }
1997
1998
68
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
1999
1
                    null_map[row_idx] = 1;
2000
1
                    continue;
2001
1
                }
2002
2003
67
                auto find_result =
2004
67
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2005
2006
67
                if (find_result.is_wildcard) {
2007
0
                    return Status::InvalidArgument(
2008
0
                            " In this situation, path expressions may not contain the * and ** "
2009
0
                            "tokens or an array range, argument index: {}, row index: {}",
2010
0
                            i, row_idx);
2011
0
                }
2012
2013
                if constexpr (modify_type == JsonbModifyType::Insert) {
2014
                    if (find_result.value) {
2015
                        continue;
2016
                    }
2017
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2018
67
                    if (!find_result.value) {
2019
67
                        continue;
2020
67
                    }
2021
67
                }
2022
2023
67
                std::vector<const JsonbValue*> parents;
2024
2025
67
                bool replace = false;
2026
67
                parents.emplace_back(json_documents[row_idx]->getValue());
2027
67
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2028
67
                if (find_result.value) {
2029
                    // find target path, replace it with the new value.
2030
44
                    replace = true;
2031
44
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2032
44
                                               json_path[path_index], parents)) {
2033
0
                        continue;
2034
0
                    }
2035
44
                } else {
2036
                    // does not find target path, insert the new value.
2037
23
                    JsonbPath new_path;
2038
23
                    DCHECK_GT(legs_count, 0);
2039
42
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2040
19
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2041
19
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2042
19
                                current_leg->leg_ptr, current_leg->leg_len,
2043
19
                                current_leg->array_index, current_leg->type);
2044
19
                        new_path.add_leg_to_leg_vector(std::move(leg));
2045
19
                    }
2046
2047
23
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2048
23
                                               parents)) {
2049
11
                        continue;
2050
11
                    }
2051
23
                }
2052
2053
56
                leg_info* last_leg =
2054
56
                        legs_count > 0
2055
56
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2056
56
                                : nullptr;
2057
56
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2058
56
                                                 json_value[value_index], replace, last_leg,
2059
56
                                                 writer));
2060
2061
56
                auto* writer_output = writer.getOutput();
2062
56
                if (writer_output->getSize() > tmp_buffer.capacity) {
2063
24
                    tmp_buffer.capacity =
2064
24
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2065
24
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2066
24
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2067
24
                }
2068
2069
56
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2070
56
                tmp_buffer.size = writer_output->getSize();
2071
2072
56
                writer.reset();
2073
2074
56
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2075
56
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2076
56
            }
2077
2078
39
            if (!null_map[row_idx]) {
2079
38
                const auto* jsonb_document = json_documents[row_idx];
2080
38
                const auto size = jsonb_document->numPackedBytes();
2081
38
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2082
38
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2083
38
            }
2084
2085
39
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2086
2087
39
            if (!null_map[row_idx]) {
2088
38
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2089
38
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2090
38
                const JsonbDocument* doc = nullptr;
2091
38
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2092
38
                        reinterpret_cast<const char*>(ptr), size, &doc));
2093
38
            }
2094
39
        }
2095
2096
26
        block.get_by_position(result).column = std::move(result_column);
2097
26
        return Status::OK();
2098
26
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1847
28
                        uint32_t result, size_t input_rows_count) const override {
1848
28
        if (arguments.size() % 2 != 1 || arguments.size() < 3) {
1849
0
            return Status::InvalidArgument(
1850
0
                    "Function {} must have an odd number of arguments and more than 2 arguments, "
1851
0
                    "but got: {}",
1852
0
                    name, arguments.size());
1853
0
        }
1854
1855
28
        const size_t keys_count = (arguments.size() - 1) / 2;
1856
1857
28
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
1858
1859
28
        auto result_column = return_data_type->create_column();
1860
28
        auto& result_nullable_col = assert_cast<ColumnNullable&>(*result_column);
1861
28
        auto& null_map = result_nullable_col.get_null_map_data();
1862
28
        auto& res_string_column =
1863
28
                assert_cast<ColumnString&>(result_nullable_col.get_nested_column());
1864
28
        auto& res_chars = res_string_column.get_chars();
1865
28
        auto& res_offsets = res_string_column.get_offsets();
1866
1867
28
        null_map.resize_fill(input_rows_count, 0);
1868
28
        res_offsets.resize(input_rows_count);
1869
28
        auto&& [json_data_arg_column, json_data_const] =
1870
28
                unpack_if_const(block.get_by_position(arguments[0]).column);
1871
1872
28
        if (json_data_const) {
1873
3
            if (json_data_arg_column->is_null_at(0)) {
1874
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1875
0
            }
1876
3
        }
1877
1878
28
        std::vector<const ColumnString*> json_path_columns(keys_count);
1879
28
        std::vector<bool> json_path_constant(keys_count);
1880
28
        std::vector<const NullMap*> json_path_null_maps(keys_count, nullptr);
1881
1882
28
        std::vector<const ColumnString*> json_value_columns(keys_count);
1883
28
        std::vector<bool> json_value_constant(keys_count);
1884
28
        std::vector<const NullMap*> json_value_null_maps(keys_count, nullptr);
1885
1886
28
        const NullMap* json_data_null_map = nullptr;
1887
28
        const ColumnString* json_data_column;
1888
28
        if (const auto* nullable_column =
1889
28
                    check_and_get_column<ColumnNullable>(json_data_arg_column.get())) {
1890
28
            json_data_null_map = &nullable_column->get_null_map_data();
1891
28
            const auto& nested_column = nullable_column->get_nested_column();
1892
28
            json_data_column = assert_cast<const ColumnString*>(&nested_column);
1893
28
        } else {
1894
0
            json_data_column = assert_cast<const ColumnString*>(json_data_arg_column.get());
1895
0
        }
1896
1897
66
        for (size_t i = 1; i < arguments.size(); i += 2) {
1898
38
            auto&& [path_column, path_const] =
1899
38
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1900
38
            auto&& [value_column, value_const] =
1901
38
                    unpack_if_const(block.get_by_position(arguments[i + 1]).column);
1902
1903
38
            if (path_const) {
1904
11
                if (path_column->is_null_at(0)) {
1905
0
                    return create_all_null_result(return_data_type, block, result,
1906
0
                                                  input_rows_count);
1907
0
                }
1908
11
            }
1909
1910
38
            json_path_constant[i / 2] = path_const;
1911
38
            if (const auto* nullable_column =
1912
38
                        check_and_get_column<ColumnNullable>(path_column.get())) {
1913
1
                json_path_null_maps[i / 2] = &nullable_column->get_null_map_data();
1914
1
                const auto& nested_column = nullable_column->get_nested_column();
1915
1
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1916
37
            } else {
1917
37
                json_path_columns[i / 2] = assert_cast<const ColumnString*>(path_column.get());
1918
37
            }
1919
1920
38
            json_value_constant[i / 2] = value_const;
1921
38
            if (const auto* nullable_column =
1922
38
                        check_and_get_column<ColumnNullable>(value_column.get())) {
1923
19
                json_value_null_maps[i / 2] = &nullable_column->get_null_map_data();
1924
19
                const auto& nested_column = nullable_column->get_nested_column();
1925
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(&nested_column);
1926
19
            } else {
1927
19
                json_value_columns[i / 2] = assert_cast<const ColumnString*>(value_column.get());
1928
19
            }
1929
38
        }
1930
1931
28
        DorisVector<const JsonbDocument*> json_documents(input_rows_count);
1932
28
        if (json_data_const) {
1933
3
            auto json_data_string = json_data_column->get_data_at(0);
1934
3
            const JsonbDocument* doc = nullptr;
1935
3
            RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1936
3
                                                                  json_data_string.size, &doc));
1937
3
            if (!doc || !doc->getValue()) [[unlikely]] {
1938
0
                return create_all_null_result(return_data_type, block, result, input_rows_count);
1939
0
            }
1940
15
            for (size_t i = 0; i != input_rows_count; ++i) {
1941
12
                json_documents[i] = doc;
1942
12
            }
1943
25
        } else {
1944
50
            for (size_t i = 0; i != input_rows_count; ++i) {
1945
25
                if (json_data_null_map && (*json_data_null_map)[i]) {
1946
0
                    null_map[i] = 1;
1947
0
                    json_documents[i] = nullptr;
1948
0
                    continue;
1949
0
                }
1950
1951
25
                auto json_data_string = json_data_column->get_data_at(i);
1952
25
                const JsonbDocument* doc = nullptr;
1953
25
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(json_data_string.data,
1954
25
                                                                      json_data_string.size, &doc));
1955
25
                if (!doc || !doc->getValue()) [[unlikely]] {
1956
0
                    null_map[i] = 1;
1957
0
                    continue;
1958
0
                }
1959
25
                json_documents[i] = doc;
1960
25
            }
1961
25
        }
1962
1963
28
        DorisVector<DorisVector<JsonbPath>> json_paths(keys_count);
1964
28
        DorisVector<DorisVector<const JsonbValue*>> json_values(keys_count);
1965
1966
28
        RETURN_IF_ERROR(parse_paths_and_values(json_paths, json_values, arguments, input_rows_count,
1967
28
                                               json_path_columns, json_path_constant,
1968
28
                                               json_path_null_maps, json_value_columns,
1969
28
                                               json_value_constant, json_value_null_maps));
1970
1971
26
        JsonbWriter writer;
1972
26
        struct DocumentBuffer {
1973
26
            DorisUniqueBufferPtr<char> ptr;
1974
26
            size_t size = 0;
1975
26
            size_t capacity = 0;
1976
26
        };
1977
1978
26
        DocumentBuffer tmp_buffer;
1979
1980
72
        for (size_t row_idx = 0; row_idx != input_rows_count; ++row_idx) {
1981
114
            for (size_t i = 1; i < arguments.size(); i += 2) {
1982
68
                const size_t index = i / 2;
1983
68
                auto& json_path = json_paths[index];
1984
68
                auto& json_value = json_values[index];
1985
1986
68
                const auto path_index = index_check_const(row_idx, json_path_constant[index]);
1987
68
                const auto value_index = index_check_const(row_idx, json_value_constant[index]);
1988
1989
68
                if (null_map[row_idx]) {
1990
0
                    continue;
1991
0
                }
1992
1993
68
                if (json_documents[row_idx] == nullptr) {
1994
0
                    null_map[row_idx] = 1;
1995
0
                    continue;
1996
0
                }
1997
1998
68
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[path_index]) {
1999
1
                    null_map[row_idx] = 1;
2000
1
                    continue;
2001
1
                }
2002
2003
67
                auto find_result =
2004
67
                        json_documents[row_idx]->getValue()->findValue(json_path[path_index]);
2005
2006
67
                if (find_result.is_wildcard) {
2007
0
                    return Status::InvalidArgument(
2008
0
                            " In this situation, path expressions may not contain the * and ** "
2009
0
                            "tokens or an array range, argument index: {}, row index: {}",
2010
0
                            i, row_idx);
2011
0
                }
2012
2013
                if constexpr (modify_type == JsonbModifyType::Insert) {
2014
                    if (find_result.value) {
2015
                        continue;
2016
                    }
2017
67
                } else if constexpr (modify_type == JsonbModifyType::Replace) {
2018
67
                    if (!find_result.value) {
2019
11
                        continue;
2020
11
                    }
2021
67
                }
2022
2023
56
                std::vector<const JsonbValue*> parents;
2024
2025
67
                bool replace = false;
2026
67
                parents.emplace_back(json_documents[row_idx]->getValue());
2027
67
                const auto legs_count = json_path[path_index].get_leg_vector_size();
2028
67
                if (find_result.value) {
2029
                    // find target path, replace it with the new value.
2030
56
                    replace = true;
2031
56
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(),
2032
56
                                               json_path[path_index], parents)) {
2033
0
                        continue;
2034
0
                    }
2035
56
                } else {
2036
                    // does not find target path, insert the new value.
2037
11
                    JsonbPath new_path;
2038
11
                    DCHECK_GT(legs_count, 0);
2039
11
                    for (size_t j = 0; j + 1 < legs_count; ++j) {
2040
0
                        auto* current_leg = json_path[path_index].get_leg_from_leg_vector(j);
2041
0
                        std::unique_ptr<leg_info> leg = std::make_unique<leg_info>(
2042
0
                                current_leg->leg_ptr, current_leg->leg_len,
2043
0
                                current_leg->array_index, current_leg->type);
2044
0
                        new_path.add_leg_to_leg_vector(std::move(leg));
2045
0
                    }
2046
2047
11
                    if (!build_parents_by_path(json_documents[row_idx]->getValue(), new_path,
2048
11
                                               parents)) {
2049
0
                        continue;
2050
0
                    }
2051
11
                }
2052
2053
67
                leg_info* last_leg =
2054
67
                        legs_count > 0
2055
67
                                ? json_path[path_index].get_leg_from_leg_vector(legs_count - 1)
2056
67
                                : nullptr;
2057
67
                RETURN_IF_ERROR(write_json_value(json_documents[row_idx]->getValue(), parents, 0,
2058
67
                                                 json_value[value_index], replace, last_leg,
2059
67
                                                 writer));
2060
2061
67
                auto* writer_output = writer.getOutput();
2062
67
                if (writer_output->getSize() > tmp_buffer.capacity) {
2063
23
                    tmp_buffer.capacity =
2064
23
                            ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2065
23
                    tmp_buffer.ptr = make_unique_buffer<char>(tmp_buffer.capacity);
2066
23
                    DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2067
23
                }
2068
2069
67
                memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(), writer_output->getSize());
2070
67
                tmp_buffer.size = writer_output->getSize();
2071
2072
67
                writer.reset();
2073
2074
67
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2075
67
                        tmp_buffer.ptr.get(), tmp_buffer.size, &json_documents[row_idx]));
2076
67
            }
2077
2078
46
            if (!null_map[row_idx]) {
2079
34
                const auto* jsonb_document = json_documents[row_idx];
2080
34
                const auto size = jsonb_document->numPackedBytes();
2081
34
                res_chars.insert(reinterpret_cast<const char*>(jsonb_document),
2082
34
                                 reinterpret_cast<const char*>(jsonb_document) + size);
2083
34
            }
2084
2085
46
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2086
2087
46
            if (!null_map[row_idx]) {
2088
34
                auto* ptr = res_chars.data() + res_offsets[row_idx - 1];
2089
34
                auto size = res_offsets[row_idx] - res_offsets[row_idx - 1];
2090
34
                const JsonbDocument* doc = nullptr;
2091
34
                THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2092
34
                        reinterpret_cast<const char*>(ptr), size, &doc));
2093
34
            }
2094
46
        }
2095
2096
37
        block.get_by_position(result).column = std::move(result_column);
2097
37
        return Status::OK();
2098
26
    }
2099
2100
    bool build_parents_by_path(const JsonbValue* root, const JsonbPath& path,
2101
352
                               std::vector<const JsonbValue*>& parents) const {
2102
352
        const size_t index = parents.size() - 1;
2103
352
        if (index == path.get_leg_vector_size()) {
2104
149
            return true;
2105
149
        }
2106
2107
203
        JsonbPath current;
2108
203
        auto* current_leg = path.get_leg_from_leg_vector(index);
2109
203
        std::unique_ptr<leg_info> leg =
2110
203
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2111
203
                                           current_leg->array_index, current_leg->type);
2112
203
        current.add_leg_to_leg_vector(std::move(leg));
2113
2114
203
        auto find_result = root->findValue(current);
2115
203
        if (!find_result.value) {
2116
12
            return false;
2117
191
        } else if (find_result.value == root) {
2118
6
            return true;
2119
185
        } else {
2120
185
            parents.emplace_back(find_result.value);
2121
185
        }
2122
2123
185
        return build_parents_by_path(find_result.value, path, parents);
2124
203
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2101
84
                               std::vector<const JsonbValue*>& parents) const {
2102
84
        const size_t index = parents.size() - 1;
2103
84
        if (index == path.get_leg_vector_size()) {
2104
43
            return true;
2105
43
        }
2106
2107
41
        JsonbPath current;
2108
41
        auto* current_leg = path.get_leg_from_leg_vector(index);
2109
41
        std::unique_ptr<leg_info> leg =
2110
41
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2111
41
                                           current_leg->array_index, current_leg->type);
2112
41
        current.add_leg_to_leg_vector(std::move(leg));
2113
2114
41
        auto find_result = root->findValue(current);
2115
41
        if (!find_result.value) {
2116
1
            return false;
2117
40
        } else if (find_result.value == root) {
2118
0
            return true;
2119
40
        } else {
2120
40
            parents.emplace_back(find_result.value);
2121
40
        }
2122
2123
40
        return build_parents_by_path(find_result.value, path, parents);
2124
41
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2101
137
                               std::vector<const JsonbValue*>& parents) const {
2102
137
        const size_t index = parents.size() - 1;
2103
137
        if (index == path.get_leg_vector_size()) {
2104
53
            return true;
2105
53
        }
2106
2107
84
        JsonbPath current;
2108
84
        auto* current_leg = path.get_leg_from_leg_vector(index);
2109
84
        std::unique_ptr<leg_info> leg =
2110
84
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2111
84
                                           current_leg->array_index, current_leg->type);
2112
84
        current.add_leg_to_leg_vector(std::move(leg));
2113
2114
84
        auto find_result = root->findValue(current);
2115
84
        if (!find_result.value) {
2116
11
            return false;
2117
73
        } else if (find_result.value == root) {
2118
3
            return true;
2119
70
        } else {
2120
70
            parents.emplace_back(find_result.value);
2121
70
        }
2122
2123
70
        return build_parents_by_path(find_result.value, path, parents);
2124
84
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE21build_parents_by_pathEPKNS_10JsonbValueERKNS_9JsonbPathERSt6vectorIS5_SaIS5_EE
Line
Count
Source
2101
131
                               std::vector<const JsonbValue*>& parents) const {
2102
131
        const size_t index = parents.size() - 1;
2103
131
        if (index == path.get_leg_vector_size()) {
2104
53
            return true;
2105
53
        }
2106
2107
78
        JsonbPath current;
2108
78
        auto* current_leg = path.get_leg_from_leg_vector(index);
2109
78
        std::unique_ptr<leg_info> leg =
2110
78
                std::make_unique<leg_info>(current_leg->leg_ptr, current_leg->leg_len,
2111
78
                                           current_leg->array_index, current_leg->type);
2112
78
        current.add_leg_to_leg_vector(std::move(leg));
2113
2114
78
        auto find_result = root->findValue(current);
2115
78
        if (!find_result.value) {
2116
0
            return false;
2117
78
        } else if (find_result.value == root) {
2118
3
            return true;
2119
75
        } else {
2120
75
            parents.emplace_back(find_result.value);
2121
75
        }
2122
2123
75
        return build_parents_by_path(find_result.value, path, parents);
2124
78
    }
2125
2126
    Status write_json_value(const JsonbValue* root, const std::vector<const JsonbValue*>& parents,
2127
                            const size_t parent_index, const JsonbValue* value, const bool replace,
2128
340
                            const leg_info* last_leg, JsonbWriter& writer) const {
2129
340
        if (parent_index >= parents.size()) {
2130
0
            return Status::InvalidArgument(
2131
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2132
0
                    parent_index, parents.size());
2133
0
        }
2134
2135
340
        if (parents[parent_index] != root) {
2136
0
            return Status::InvalidArgument(
2137
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2138
0
                    "parents size: {}",
2139
0
                    parent_index, parents.size());
2140
0
        }
2141
2142
340
        if (parent_index == parents.size() - 1 && replace) {
2143
            // We are at the last parent, write the value directly
2144
100
            if (value == nullptr) {
2145
24
                writer.writeNull();
2146
76
            } else {
2147
76
                writer.writeValue(value);
2148
76
            }
2149
100
            return Status::OK();
2150
100
        }
2151
2152
240
        bool value_written = false;
2153
240
        bool is_last_parent = (parent_index == parents.size() - 1);
2154
240
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2155
240
        if (root->isArray()) {
2156
23
            writer.writeStartArray();
2157
23
            const auto* array_val = root->unpack<ArrayVal>();
2158
67
            for (int i = 0; i != array_val->numElem(); ++i) {
2159
44
                auto* it = array_val->get(i);
2160
2161
44
                if (is_last_parent && last_leg->array_index == i) {
2162
0
                    value_written = true;
2163
0
                    writer.writeValue(value);
2164
44
                } else if (it == next_parent) {
2165
13
                    value_written = true;
2166
13
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2167
13
                                                     last_leg, writer));
2168
31
                } else {
2169
31
                    writer.writeValue(it);
2170
31
                }
2171
44
            }
2172
23
            if (is_last_parent && !value_written) {
2173
10
                value_written = true;
2174
10
                writer.writeValue(value);
2175
10
            }
2176
2177
23
            writer.writeEndArray();
2178
2179
217
        } else {
2180
            /**
2181
                Because even for a non-array object, `$[0]` can still point to that object:
2182
                ```
2183
                select json_extract('{"key": "value"}', '$[0]');
2184
                +------------------------------------------+
2185
                | json_extract('{"key": "value"}', '$[0]') |
2186
                +------------------------------------------+
2187
                | {"key": "value"}                         |
2188
                +------------------------------------------+
2189
                ```
2190
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2191
                it should be converted to an array before insertion:
2192
                ```
2193
                select json_insert('123','$[1]', null);
2194
                +---------------------------------+
2195
                | json_insert('123','$[1]', null) |
2196
                +---------------------------------+
2197
                | [123, null]                     |
2198
                +---------------------------------+
2199
                ```
2200
             */
2201
217
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2202
8
                writer.writeStartArray();
2203
8
                writer.writeValue(root);
2204
8
                writer.writeValue(value);
2205
8
                writer.writeEndArray();
2206
8
                return Status::OK();
2207
209
            } else if (root->isObject()) {
2208
209
                writer.writeStartObject();
2209
209
                const auto* object_val = root->unpack<ObjectVal>();
2210
403
                for (const auto& it : *object_val) {
2211
403
                    writer.writeKey(it.getKeyStr(), it.klen());
2212
403
                    if (it.value() == next_parent) {
2213
172
                        value_written = true;
2214
172
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2215
172
                                                         value, replace, last_leg, writer));
2216
231
                    } else {
2217
231
                        writer.writeValue(it.value());
2218
231
                    }
2219
403
                }
2220
2221
209
                if (is_last_parent && !value_written) {
2222
37
                    value_written = true;
2223
37
                    writer.writeStartObject();
2224
37
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2225
37
                    writer.writeValue(value);
2226
37
                    writer.writeEndObject();
2227
37
                }
2228
209
                writer.writeEndObject();
2229
2230
209
            } else {
2231
0
                return Status::InvalidArgument("Cannot insert value into this type");
2232
0
            }
2233
217
        }
2234
2235
232
        if (!value_written) {
2236
0
            return Status::InvalidArgument(
2237
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2238
0
                    parent_index, parents.size());
2239
0
        }
2240
2241
232
        return Status::OK();
2242
232
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2128
83
                            const leg_info* last_leg, JsonbWriter& writer) const {
2129
83
        if (parent_index >= parents.size()) {
2130
0
            return Status::InvalidArgument(
2131
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2132
0
                    parent_index, parents.size());
2133
0
        }
2134
2135
83
        if (parents[parent_index] != root) {
2136
0
            return Status::InvalidArgument(
2137
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2138
0
                    "parents size: {}",
2139
0
                    parent_index, parents.size());
2140
0
        }
2141
2142
83
        if (parent_index == parents.size() - 1 && replace) {
2143
            // We are at the last parent, write the value directly
2144
0
            if (value == nullptr) {
2145
0
                writer.writeNull();
2146
0
            } else {
2147
0
                writer.writeValue(value);
2148
0
            }
2149
0
            return Status::OK();
2150
0
        }
2151
2152
83
        bool value_written = false;
2153
83
        bool is_last_parent = (parent_index == parents.size() - 1);
2154
83
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2155
83
        if (root->isArray()) {
2156
5
            writer.writeStartArray();
2157
5
            const auto* array_val = root->unpack<ArrayVal>();
2158
14
            for (int i = 0; i != array_val->numElem(); ++i) {
2159
9
                auto* it = array_val->get(i);
2160
2161
9
                if (is_last_parent && last_leg->array_index == i) {
2162
0
                    value_written = true;
2163
0
                    writer.writeValue(value);
2164
9
                } else if (it == next_parent) {
2165
0
                    value_written = true;
2166
0
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2167
0
                                                     last_leg, writer));
2168
9
                } else {
2169
9
                    writer.writeValue(it);
2170
9
                }
2171
9
            }
2172
5
            if (is_last_parent && !value_written) {
2173
5
                value_written = true;
2174
5
                writer.writeValue(value);
2175
5
            }
2176
2177
5
            writer.writeEndArray();
2178
2179
78
        } else {
2180
            /**
2181
                Because even for a non-array object, `$[0]` can still point to that object:
2182
                ```
2183
                select json_extract('{"key": "value"}', '$[0]');
2184
                +------------------------------------------+
2185
                | json_extract('{"key": "value"}', '$[0]') |
2186
                +------------------------------------------+
2187
                | {"key": "value"}                         |
2188
                +------------------------------------------+
2189
                ```
2190
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2191
                it should be converted to an array before insertion:
2192
                ```
2193
                select json_insert('123','$[1]', null);
2194
                +---------------------------------+
2195
                | json_insert('123','$[1]', null) |
2196
                +---------------------------------+
2197
                | [123, null]                     |
2198
                +---------------------------------+
2199
                ```
2200
             */
2201
78
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2202
4
                writer.writeStartArray();
2203
4
                writer.writeValue(root);
2204
4
                writer.writeValue(value);
2205
4
                writer.writeEndArray();
2206
4
                return Status::OK();
2207
74
            } else if (root->isObject()) {
2208
74
                writer.writeStartObject();
2209
74
                const auto* object_val = root->unpack<ObjectVal>();
2210
74
                for (const auto& it : *object_val) {
2211
70
                    writer.writeKey(it.getKeyStr(), it.klen());
2212
70
                    if (it.value() == next_parent) {
2213
40
                        value_written = true;
2214
40
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2215
40
                                                         value, replace, last_leg, writer));
2216
40
                    } else {
2217
30
                        writer.writeValue(it.value());
2218
30
                    }
2219
70
                }
2220
2221
74
                if (is_last_parent && !value_written) {
2222
34
                    value_written = true;
2223
34
                    writer.writeStartObject();
2224
34
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2225
34
                    writer.writeValue(value);
2226
34
                    writer.writeEndObject();
2227
34
                }
2228
74
                writer.writeEndObject();
2229
2230
74
            } else {
2231
0
                return Status::InvalidArgument("Cannot insert value into this type");
2232
0
            }
2233
78
        }
2234
2235
79
        if (!value_written) {
2236
0
            return Status::InvalidArgument(
2237
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2238
0
                    parent_index, parents.size());
2239
0
        }
2240
2241
79
        return Status::OK();
2242
79
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2128
126
                            const leg_info* last_leg, JsonbWriter& writer) const {
2129
126
        if (parent_index >= parents.size()) {
2130
0
            return Status::InvalidArgument(
2131
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2132
0
                    parent_index, parents.size());
2133
0
        }
2134
2135
126
        if (parents[parent_index] != root) {
2136
0
            return Status::InvalidArgument(
2137
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2138
0
                    "parents size: {}",
2139
0
                    parent_index, parents.size());
2140
0
        }
2141
2142
126
        if (parent_index == parents.size() - 1 && replace) {
2143
            // We are at the last parent, write the value directly
2144
44
            if (value == nullptr) {
2145
10
                writer.writeNull();
2146
34
            } else {
2147
34
                writer.writeValue(value);
2148
34
            }
2149
44
            return Status::OK();
2150
44
        }
2151
2152
82
        bool value_written = false;
2153
82
        bool is_last_parent = (parent_index == parents.size() - 1);
2154
82
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2155
82
        if (root->isArray()) {
2156
10
            writer.writeStartArray();
2157
10
            const auto* array_val = root->unpack<ArrayVal>();
2158
29
            for (int i = 0; i != array_val->numElem(); ++i) {
2159
19
                auto* it = array_val->get(i);
2160
2161
19
                if (is_last_parent && last_leg->array_index == i) {
2162
0
                    value_written = true;
2163
0
                    writer.writeValue(value);
2164
19
                } else if (it == next_parent) {
2165
5
                    value_written = true;
2166
5
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2167
5
                                                     last_leg, writer));
2168
14
                } else {
2169
14
                    writer.writeValue(it);
2170
14
                }
2171
19
            }
2172
10
            if (is_last_parent && !value_written) {
2173
5
                value_written = true;
2174
5
                writer.writeValue(value);
2175
5
            }
2176
2177
10
            writer.writeEndArray();
2178
2179
72
        } else {
2180
            /**
2181
                Because even for a non-array object, `$[0]` can still point to that object:
2182
                ```
2183
                select json_extract('{"key": "value"}', '$[0]');
2184
                +------------------------------------------+
2185
                | json_extract('{"key": "value"}', '$[0]') |
2186
                +------------------------------------------+
2187
                | {"key": "value"}                         |
2188
                +------------------------------------------+
2189
                ```
2190
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2191
                it should be converted to an array before insertion:
2192
                ```
2193
                select json_insert('123','$[1]', null);
2194
                +---------------------------------+
2195
                | json_insert('123','$[1]', null) |
2196
                +---------------------------------+
2197
                | [123, null]                     |
2198
                +---------------------------------+
2199
                ```
2200
             */
2201
72
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2202
4
                writer.writeStartArray();
2203
4
                writer.writeValue(root);
2204
4
                writer.writeValue(value);
2205
4
                writer.writeEndArray();
2206
4
                return Status::OK();
2207
68
            } else if (root->isObject()) {
2208
68
                writer.writeStartObject();
2209
68
                const auto* object_val = root->unpack<ObjectVal>();
2210
158
                for (const auto& it : *object_val) {
2211
158
                    writer.writeKey(it.getKeyStr(), it.klen());
2212
158
                    if (it.value() == next_parent) {
2213
65
                        value_written = true;
2214
65
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2215
65
                                                         value, replace, last_leg, writer));
2216
93
                    } else {
2217
93
                        writer.writeValue(it.value());
2218
93
                    }
2219
158
                }
2220
2221
68
                if (is_last_parent && !value_written) {
2222
3
                    value_written = true;
2223
3
                    writer.writeStartObject();
2224
3
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2225
3
                    writer.writeValue(value);
2226
3
                    writer.writeEndObject();
2227
3
                }
2228
68
                writer.writeEndObject();
2229
2230
68
            } else {
2231
0
                return Status::InvalidArgument("Cannot insert value into this type");
2232
0
            }
2233
72
        }
2234
2235
78
        if (!value_written) {
2236
0
            return Status::InvalidArgument(
2237
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2238
0
                    parent_index, parents.size());
2239
0
        }
2240
2241
78
        return Status::OK();
2242
78
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE16write_json_valueEPKNS_10JsonbValueERKSt6vectorIS5_SaIS5_EEmS5_bPKNS_8leg_infoERNS_12JsonbWriterTINS_14JsonbOutStreamEEE
Line
Count
Source
2128
131
                            const leg_info* last_leg, JsonbWriter& writer) const {
2129
131
        if (parent_index >= parents.size()) {
2130
0
            return Status::InvalidArgument(
2131
0
                    "JsonbModify: parent_index {} is out of bounds for parents size {}",
2132
0
                    parent_index, parents.size());
2133
0
        }
2134
2135
131
        if (parents[parent_index] != root) {
2136
0
            return Status::InvalidArgument(
2137
0
                    "JsonbModify: parent value does not match root value, parent_index: {}, "
2138
0
                    "parents size: {}",
2139
0
                    parent_index, parents.size());
2140
0
        }
2141
2142
131
        if (parent_index == parents.size() - 1 && replace) {
2143
            // We are at the last parent, write the value directly
2144
56
            if (value == nullptr) {
2145
14
                writer.writeNull();
2146
42
            } else {
2147
42
                writer.writeValue(value);
2148
42
            }
2149
56
            return Status::OK();
2150
56
        }
2151
2152
75
        bool value_written = false;
2153
75
        bool is_last_parent = (parent_index == parents.size() - 1);
2154
75
        const auto* next_parent = is_last_parent ? nullptr : parents[parent_index + 1];
2155
75
        if (root->isArray()) {
2156
8
            writer.writeStartArray();
2157
8
            const auto* array_val = root->unpack<ArrayVal>();
2158
24
            for (int i = 0; i != array_val->numElem(); ++i) {
2159
16
                auto* it = array_val->get(i);
2160
2161
16
                if (is_last_parent && last_leg->array_index == i) {
2162
0
                    value_written = true;
2163
0
                    writer.writeValue(value);
2164
16
                } else if (it == next_parent) {
2165
8
                    value_written = true;
2166
8
                    RETURN_IF_ERROR(write_json_value(it, parents, parent_index + 1, value, replace,
2167
8
                                                     last_leg, writer));
2168
8
                } else {
2169
8
                    writer.writeValue(it);
2170
8
                }
2171
16
            }
2172
8
            if (is_last_parent && !value_written) {
2173
0
                value_written = true;
2174
0
                writer.writeValue(value);
2175
0
            }
2176
2177
8
            writer.writeEndArray();
2178
2179
67
        } else {
2180
            /**
2181
                Because even for a non-array object, `$[0]` can still point to that object:
2182
                ```
2183
                select json_extract('{"key": "value"}', '$[0]');
2184
                +------------------------------------------+
2185
                | json_extract('{"key": "value"}', '$[0]') |
2186
                +------------------------------------------+
2187
                | {"key": "value"}                         |
2188
                +------------------------------------------+
2189
                ```
2190
                So when inserting an element into `$[1]`, even if '$' does not represent an array,
2191
                it should be converted to an array before insertion:
2192
                ```
2193
                select json_insert('123','$[1]', null);
2194
                +---------------------------------+
2195
                | json_insert('123','$[1]', null) |
2196
                +---------------------------------+
2197
                | [123, null]                     |
2198
                +---------------------------------+
2199
                ```
2200
             */
2201
67
            if (is_last_parent && last_leg && last_leg->type == ARRAY_CODE) {
2202
0
                writer.writeStartArray();
2203
0
                writer.writeValue(root);
2204
0
                writer.writeValue(value);
2205
0
                writer.writeEndArray();
2206
0
                return Status::OK();
2207
67
            } else if (root->isObject()) {
2208
67
                writer.writeStartObject();
2209
67
                const auto* object_val = root->unpack<ObjectVal>();
2210
175
                for (const auto& it : *object_val) {
2211
175
                    writer.writeKey(it.getKeyStr(), it.klen());
2212
175
                    if (it.value() == next_parent) {
2213
67
                        value_written = true;
2214
67
                        RETURN_IF_ERROR(write_json_value(it.value(), parents, parent_index + 1,
2215
67
                                                         value, replace, last_leg, writer));
2216
108
                    } else {
2217
108
                        writer.writeValue(it.value());
2218
108
                    }
2219
175
                }
2220
2221
67
                if (is_last_parent && !value_written) {
2222
0
                    value_written = true;
2223
0
                    writer.writeStartObject();
2224
0
                    writer.writeKey(last_leg->leg_ptr, static_cast<uint8_t>(last_leg->leg_len));
2225
0
                    writer.writeValue(value);
2226
0
                    writer.writeEndObject();
2227
0
                }
2228
67
                writer.writeEndObject();
2229
2230
67
            } else {
2231
0
                return Status::InvalidArgument("Cannot insert value into this type");
2232
0
            }
2233
67
        }
2234
2235
75
        if (!value_written) {
2236
0
            return Status::InvalidArgument(
2237
0
                    "JsonbModify: value not written, parent_index: {}, parents size: {}",
2238
0
                    parent_index, parents.size());
2239
0
        }
2240
2241
75
        return Status::OK();
2242
75
    }
2243
2244
    Status parse_paths_and_values(DorisVector<DorisVector<JsonbPath>>& json_paths,
2245
                                  DorisVector<DorisVector<const JsonbValue*>>& json_values,
2246
                                  const ColumnNumbers& arguments, const size_t input_rows_count,
2247
                                  const std::vector<const ColumnString*>& json_path_columns,
2248
                                  const std::vector<bool>& json_path_constant,
2249
                                  const std::vector<const NullMap*>& json_path_null_maps,
2250
                                  const std::vector<const ColumnString*>& json_value_columns,
2251
                                  const std::vector<bool>& json_value_constant,
2252
85
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2253
187
        for (size_t i = 1; i < arguments.size(); i += 2) {
2254
110
            const size_t index = i / 2;
2255
110
            const auto* json_path_column = json_path_columns[index];
2256
110
            const auto* value_column = json_value_columns[index];
2257
2258
110
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2259
110
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2260
2261
225
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2262
123
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2263
6
                    continue;
2264
6
                }
2265
2266
117
                auto path_string = json_path_column->get_data_at(row_idx);
2267
117
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2268
3
                    return Status::InvalidArgument(
2269
3
                            "Json path error: Invalid Json Path for value: {}, "
2270
3
                            "argument "
2271
3
                            "index: {}, row index: {}",
2272
3
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2273
3
                }
2274
2275
114
                if (json_paths[index][row_idx].is_wildcard()) {
2276
5
                    return Status::InvalidArgument(
2277
5
                            "In this situation, path expressions may not contain the * and ** "
2278
5
                            "tokens, argument index: {}, row index: {}",
2279
5
                            i, row_idx);
2280
5
                }
2281
114
            }
2282
2283
302
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2284
200
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2285
48
                    continue;
2286
48
                }
2287
2288
152
                auto value_string = value_column->get_data_at(row_idx);
2289
152
                const JsonbDocument* doc = nullptr;
2290
152
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2291
152
                                                                      value_string.size, &doc));
2292
152
                if (doc) {
2293
152
                    json_values[index][row_idx] = doc->getValue();
2294
152
                }
2295
152
            }
2296
102
        }
2297
2298
77
        return Status::OK();
2299
85
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE0EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2252
29
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2253
61
        for (size_t i = 1; i < arguments.size(); i += 2) {
2254
36
            const size_t index = i / 2;
2255
36
            const auto* json_path_column = json_path_columns[index];
2256
36
            const auto* value_column = json_value_columns[index];
2257
2258
36
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2259
36
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2260
2261
79
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2262
47
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2263
4
                    continue;
2264
4
                }
2265
2266
43
                auto path_string = json_path_column->get_data_at(row_idx);
2267
43
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2268
1
                    return Status::InvalidArgument(
2269
1
                            "Json path error: Invalid Json Path for value: {}, "
2270
1
                            "argument "
2271
1
                            "index: {}, row index: {}",
2272
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2273
1
                }
2274
2275
42
                if (json_paths[index][row_idx].is_wildcard()) {
2276
3
                    return Status::InvalidArgument(
2277
3
                            "In this situation, path expressions may not contain the * and ** "
2278
3
                            "tokens, argument index: {}, row index: {}",
2279
3
                            i, row_idx);
2280
3
                }
2281
42
            }
2282
2283
96
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2284
64
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2285
14
                    continue;
2286
14
                }
2287
2288
50
                auto value_string = value_column->get_data_at(row_idx);
2289
50
                const JsonbDocument* doc = nullptr;
2290
50
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2291
50
                                                                      value_string.size, &doc));
2292
50
                if (doc) {
2293
50
                    json_values[index][row_idx] = doc->getValue();
2294
50
                }
2295
50
            }
2296
32
        }
2297
2298
25
        return Status::OK();
2299
29
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE1EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2252
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2253
62
        for (size_t i = 1; i < arguments.size(); i += 2) {
2254
36
            const size_t index = i / 2;
2255
36
            const auto* json_path_column = json_path_columns[index];
2256
36
            const auto* value_column = json_value_columns[index];
2257
2258
36
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2259
36
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2260
2261
72
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2262
38
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2263
1
                    continue;
2264
1
                }
2265
2266
37
                auto path_string = json_path_column->get_data_at(row_idx);
2267
37
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2268
1
                    return Status::InvalidArgument(
2269
1
                            "Json path error: Invalid Json Path for value: {}, "
2270
1
                            "argument "
2271
1
                            "index: {}, row index: {}",
2272
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2273
1
                }
2274
2275
36
                if (json_paths[index][row_idx].is_wildcard()) {
2276
1
                    return Status::InvalidArgument(
2277
1
                            "In this situation, path expressions may not contain the * and ** "
2278
1
                            "tokens, argument index: {}, row index: {}",
2279
1
                            i, row_idx);
2280
1
                }
2281
36
            }
2282
2283
102
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2284
68
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2285
16
                    continue;
2286
16
                }
2287
2288
52
                auto value_string = value_column->get_data_at(row_idx);
2289
52
                const JsonbDocument* doc = nullptr;
2290
52
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2291
52
                                                                      value_string.size, &doc));
2292
52
                if (doc) {
2293
52
                    json_values[index][row_idx] = doc->getValue();
2294
52
                }
2295
52
            }
2296
34
        }
2297
2298
26
        return Status::OK();
2299
28
    }
_ZNK5doris19FunctionJsonbModifyILNS_15JsonbModifyTypeE2EE22parse_paths_and_valuesERSt6vectorIS3_INS_9JsonbPathENS_18CustomStdAllocatorIS4_NS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEENS5_ISA_S8_EEERS3_IS3_IPKNS_10JsonbValueENS5_ISG_S8_EEENS5_ISI_S8_EEERKS3_IjSaIjEEmRKS3_IPKNS_9ColumnStrIjEESaIST_EERKS3_IbSaIbEERKS3_IPKNS_8PODArrayIhLm4096ES8_Lm16ELm15EEESaIS15_EESX_S11_S19_
Line
Count
Source
2252
28
                                  const std::vector<const NullMap*>& json_value_null_maps) const {
2253
64
        for (size_t i = 1; i < arguments.size(); i += 2) {
2254
38
            const size_t index = i / 2;
2255
38
            const auto* json_path_column = json_path_columns[index];
2256
38
            const auto* value_column = json_value_columns[index];
2257
2258
38
            json_paths[index].resize(json_path_constant[index] ? 1 : input_rows_count);
2259
38
            json_values[index].resize(json_value_constant[index] ? 1 : input_rows_count, nullptr);
2260
2261
74
            for (size_t row_idx = 0; row_idx != json_paths[index].size(); ++row_idx) {
2262
38
                if (json_path_null_maps[index] && (*json_path_null_maps[index])[row_idx]) {
2263
1
                    continue;
2264
1
                }
2265
2266
37
                auto path_string = json_path_column->get_data_at(row_idx);
2267
37
                if (!json_paths[index][row_idx].seek(path_string.data, path_string.size)) {
2268
1
                    return Status::InvalidArgument(
2269
1
                            "Json path error: Invalid Json Path for value: {}, "
2270
1
                            "argument "
2271
1
                            "index: {}, row index: {}",
2272
1
                            std::string_view(path_string.data, path_string.size), i, row_idx);
2273
1
                }
2274
2275
36
                if (json_paths[index][row_idx].is_wildcard()) {
2276
1
                    return Status::InvalidArgument(
2277
1
                            "In this situation, path expressions may not contain the * and ** "
2278
1
                            "tokens, argument index: {}, row index: {}",
2279
1
                            i, row_idx);
2280
1
                }
2281
36
            }
2282
2283
104
            for (size_t row_idx = 0; row_idx != json_values[index].size(); ++row_idx) {
2284
68
                if (json_value_null_maps[index] && (*json_value_null_maps[index])[row_idx]) {
2285
18
                    continue;
2286
18
                }
2287
2288
50
                auto value_string = value_column->get_data_at(row_idx);
2289
50
                const JsonbDocument* doc = nullptr;
2290
50
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(value_string.data,
2291
50
                                                                      value_string.size, &doc));
2292
50
                if (doc) {
2293
50
                    json_values[index][row_idx] = doc->getValue();
2294
50
                }
2295
50
            }
2296
36
        }
2297
2298
26
        return Status::OK();
2299
28
    }
2300
};
2301
2302
struct JsonbContainsAndPathImpl {
2303
64
    static DataTypes get_variadic_argument_types() {
2304
64
        return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeJsonb>(),
2305
64
                std::make_shared<DataTypeString>()};
2306
64
    }
2307
2308
    static Status execute_impl(FunctionContext* context, Block& block,
2309
                               const ColumnNumbers& arguments, uint32_t result,
2310
137
                               size_t input_rows_count) {
2311
137
        return JsonbContainsUtil::jsonb_contains_execute(context, block, arguments, result,
2312
137
                                                         input_rows_count);
2313
137
    }
2314
};
2315
2316
class FunctionJsonSearch : public IFunction {
2317
private:
2318
    using OneFun = std::function<Status(size_t, bool*)>;
2319
70
    static Status always_one(size_t i, bool* res) {
2320
70
        *res = true;
2321
70
        return Status::OK();
2322
70
    }
2323
48
    static Status always_all(size_t i, bool* res) {
2324
48
        *res = false;
2325
48
        return Status::OK();
2326
48
    }
2327
2328
    using CheckNullFun = std::function<bool(size_t)>;
2329
304
    static bool always_not_null(size_t) { return false; }
2330
2331
    using GetJsonStringRefFun = std::function<StringRef(size_t)>;
2332
2333
356
    Status matched(const std::string_view& str, LikeState* state, unsigned char* res) const {
2334
356
        StringRef pattern; // not used
2335
356
        StringRef value_val(str.data(), str.size());
2336
356
        return (state->scalar_function)(&state->search_state, value_val, pattern, res);
2337
356
    }
2338
2339
    /**
2340
     * Recursive search for matching string, if found, the result will be added to a vector
2341
     * @param element json element
2342
     * @param one_match
2343
     * @param search_str
2344
     * @param cur_path
2345
     * @param matches The path that has already been matched
2346
     * @return true if matched else false
2347
     */
2348
    bool find_matches(const JsonbValue* element, const bool& one_match, LikeState* state,
2349
775
                      JsonbPath* cur_path, std::unordered_set<std::string>* matches) const {
2350
775
        if (element->isString()) {
2351
356
            const auto* json_string = element->unpack<JsonbStringVal>();
2352
356
            const std::string_view element_str(json_string->getBlob(), json_string->length());
2353
356
            unsigned char res;
2354
356
            RETURN_IF_ERROR(matched(element_str, state, &res));
2355
356
            if (res) {
2356
233
                std::string str;
2357
233
                auto valid = cur_path->to_string(&str);
2358
233
                if (!valid) {
2359
0
                    return false;
2360
0
                }
2361
233
                return matches->insert(str).second;
2362
233
            } else {
2363
123
                return false;
2364
123
            }
2365
419
        } else if (element->isObject()) {
2366
212
            const auto* object = element->unpack<ObjectVal>();
2367
212
            bool find = false;
2368
218
            for (const auto& item : *object) {
2369
218
                Slice key(item.getKeyStr(), item.klen());
2370
218
                const auto* child_element = item.value();
2371
                // construct an object member path leg.
2372
218
                auto leg = std::make_unique<leg_info>(key.data, key.size, 0, MEMBER_CODE);
2373
218
                cur_path->add_leg_to_leg_vector(std::move(leg));
2374
218
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2375
218
                cur_path->pop_leg_from_leg_vector();
2376
218
                if (one_match && find) {
2377
8
                    return true;
2378
8
                }
2379
218
            }
2380
204
            return find;
2381
212
        } else if (element->isArray()) {
2382
207
            const auto* array = element->unpack<ArrayVal>();
2383
207
            bool find = false;
2384
546
            for (int i = 0; i < array->numElem(); ++i) {
2385
411
                auto leg = std::make_unique<leg_info>(nullptr, 0, i, ARRAY_CODE);
2386
411
                cur_path->add_leg_to_leg_vector(std::move(leg));
2387
411
                const auto* child_element = array->get(i);
2388
                // construct an array cell path leg.
2389
411
                find |= find_matches(child_element, one_match, state, cur_path, matches);
2390
411
                cur_path->pop_leg_from_leg_vector();
2391
411
                if (one_match && find) {
2392
72
                    return true;
2393
72
                }
2394
411
            }
2395
135
            return find;
2396
207
        } else {
2397
0
            return false;
2398
0
        }
2399
775
    }
2400
2401
    void make_result_str(JsonbWriter& writer, std::unordered_set<std::string>& matches,
2402
132
                         ColumnString* result_col) const {
2403
132
        if (matches.size() == 1) {
2404
97
            for (const auto& str_ref : matches) {
2405
97
                writer.writeStartString();
2406
97
                writer.writeString(str_ref);
2407
97
                writer.writeEndString();
2408
97
            }
2409
97
        } else {
2410
35
            writer.writeStartArray();
2411
136
            for (const auto& str_ref : matches) {
2412
136
                writer.writeStartString();
2413
136
                writer.writeString(str_ref);
2414
136
                writer.writeEndString();
2415
136
            }
2416
35
            writer.writeEndArray();
2417
35
        }
2418
2419
132
        result_col->insert_data(writer.getOutput()->getBuffer(),
2420
132
                                (size_t)writer.getOutput()->getSize());
2421
132
    }
2422
2423
    template <bool search_is_const>
2424
    Status execute_vector(Block& block, size_t input_rows_count, CheckNullFun json_null_check,
2425
                          GetJsonStringRefFun col_json_string, CheckNullFun one_null_check,
2426
                          OneFun one_check, CheckNullFun search_null_check,
2427
                          const ColumnString* col_search_string, FunctionContext* context,
2428
56
                          size_t result) const {
2429
56
        auto result_col = ColumnString::create();
2430
56
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2431
2432
56
        std::shared_ptr<LikeState> state_ptr;
2433
56
        LikeState* state = nullptr;
2434
56
        if (search_is_const) {
2435
8
            state = reinterpret_cast<LikeState*>(
2436
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2437
8
        }
2438
2439
56
        bool is_one = false;
2440
2441
56
        JsonbWriter writer;
2442
238
        for (size_t i = 0; i < input_rows_count; ++i) {
2443
            // an error occurs if the json_doc argument is not a valid json document.
2444
184
            if (json_null_check(i)) {
2445
16
                null_map->get_data()[i] = 1;
2446
16
                result_col->insert_data("", 0);
2447
16
                continue;
2448
16
            }
2449
168
            const auto& json_doc_str = col_json_string(i);
2450
168
            const JsonbDocument* json_doc = nullptr;
2451
168
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2452
168
                                                            &json_doc);
2453
168
            if (!st.ok()) {
2454
0
                return Status::InvalidArgument(
2455
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2456
0
                        st.to_string());
2457
0
            }
2458
2459
168
            if (!one_null_check(i)) {
2460
164
                RETURN_IF_ERROR(one_check(i, &is_one));
2461
164
            }
2462
2463
166
            if (one_null_check(i) || search_null_check(i)) {
2464
20
                null_map->get_data()[i] = 1;
2465
20
                result_col->insert_data("", 0);
2466
20
                continue;
2467
20
            }
2468
2469
            // an error occurs if any path argument is not a valid path expression.
2470
146
            std::string root_path_str = "$";
2471
146
            JsonbPath root_path;
2472
146
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2473
146
            std::vector<JsonbPath*> paths;
2474
146
            paths.push_back(&root_path);
2475
2476
146
            if (!search_is_const) {
2477
114
                state_ptr = std::make_shared<LikeState>();
2478
114
                state_ptr->is_like_pattern = true;
2479
114
                const auto& search_str = col_search_string->get_data_at(i);
2480
114
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2481
114
                                                                         state_ptr, false));
2482
114
                state = state_ptr.get();
2483
114
            }
2484
2485
            // maintain a hashset to deduplicate matches.
2486
146
            std::unordered_set<std::string> matches;
2487
146
            for (const auto& item : paths) {
2488
146
                auto* cur_path = item;
2489
146
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2490
146
                if (is_one && find) {
2491
77
                    break;
2492
77
                }
2493
146
            }
2494
146
            if (matches.empty()) {
2495
                // returns NULL if the search_str is not found in the document.
2496
14
                null_map->get_data()[i] = 1;
2497
14
                result_col->insert_data("", 0);
2498
14
                continue;
2499
14
            }
2500
2501
132
            writer.reset();
2502
132
            make_result_str(writer, matches, result_col.get());
2503
132
        }
2504
54
        auto result_col_nullable =
2505
54
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2506
54
        block.replace_by_position(result, std::move(result_col_nullable));
2507
54
        return Status::OK();
2508
56
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb1EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2428
8
                          size_t result) const {
2429
8
        auto result_col = ColumnString::create();
2430
8
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2431
2432
8
        std::shared_ptr<LikeState> state_ptr;
2433
8
        LikeState* state = nullptr;
2434
8
        if (search_is_const) {
2435
8
            state = reinterpret_cast<LikeState*>(
2436
8
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2437
8
        }
2438
2439
8
        bool is_one = false;
2440
2441
8
        JsonbWriter writer;
2442
44
        for (size_t i = 0; i < input_rows_count; ++i) {
2443
            // an error occurs if the json_doc argument is not a valid json document.
2444
36
            if (json_null_check(i)) {
2445
4
                null_map->get_data()[i] = 1;
2446
4
                result_col->insert_data("", 0);
2447
4
                continue;
2448
4
            }
2449
32
            const auto& json_doc_str = col_json_string(i);
2450
32
            const JsonbDocument* json_doc = nullptr;
2451
32
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2452
32
                                                            &json_doc);
2453
32
            if (!st.ok()) {
2454
0
                return Status::InvalidArgument(
2455
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2456
0
                        st.to_string());
2457
0
            }
2458
2459
32
            if (!one_null_check(i)) {
2460
32
                RETURN_IF_ERROR(one_check(i, &is_one));
2461
32
            }
2462
2463
32
            if (one_null_check(i) || search_null_check(i)) {
2464
0
                null_map->get_data()[i] = 1;
2465
0
                result_col->insert_data("", 0);
2466
0
                continue;
2467
0
            }
2468
2469
            // an error occurs if any path argument is not a valid path expression.
2470
32
            std::string root_path_str = "$";
2471
32
            JsonbPath root_path;
2472
32
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2473
32
            std::vector<JsonbPath*> paths;
2474
32
            paths.push_back(&root_path);
2475
2476
32
            if (!search_is_const) {
2477
0
                state_ptr = std::make_shared<LikeState>();
2478
0
                state_ptr->is_like_pattern = true;
2479
0
                const auto& search_str = col_search_string->get_data_at(i);
2480
0
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2481
0
                                                                         state_ptr, false));
2482
0
                state = state_ptr.get();
2483
0
            }
2484
2485
            // maintain a hashset to deduplicate matches.
2486
32
            std::unordered_set<std::string> matches;
2487
32
            for (const auto& item : paths) {
2488
32
                auto* cur_path = item;
2489
32
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2490
32
                if (is_one && find) {
2491
16
                    break;
2492
16
                }
2493
32
            }
2494
32
            if (matches.empty()) {
2495
                // returns NULL if the search_str is not found in the document.
2496
0
                null_map->get_data()[i] = 1;
2497
0
                result_col->insert_data("", 0);
2498
0
                continue;
2499
0
            }
2500
2501
32
            writer.reset();
2502
32
            make_result_str(writer, matches, result_col.get());
2503
32
        }
2504
8
        auto result_col_nullable =
2505
8
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2506
8
        block.replace_by_position(result, std::move(result_col_nullable));
2507
8
        return Status::OK();
2508
8
    }
_ZNK5doris18FunctionJsonSearch14execute_vectorILb0EEENS_6StatusERNS_5BlockEmSt8functionIFbmEES5_IFNS_9StringRefEmEES7_S5_IFS2_mPbEES7_PKNS_9ColumnStrIjEEPNS_15FunctionContextEm
Line
Count
Source
2428
48
                          size_t result) const {
2429
48
        auto result_col = ColumnString::create();
2430
48
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2431
2432
48
        std::shared_ptr<LikeState> state_ptr;
2433
48
        LikeState* state = nullptr;
2434
48
        if (search_is_const) {
2435
0
            state = reinterpret_cast<LikeState*>(
2436
0
                    context->get_function_state(FunctionContext::THREAD_LOCAL));
2437
0
        }
2438
2439
48
        bool is_one = false;
2440
2441
48
        JsonbWriter writer;
2442
194
        for (size_t i = 0; i < input_rows_count; ++i) {
2443
            // an error occurs if the json_doc argument is not a valid json document.
2444
148
            if (json_null_check(i)) {
2445
12
                null_map->get_data()[i] = 1;
2446
12
                result_col->insert_data("", 0);
2447
12
                continue;
2448
12
            }
2449
136
            const auto& json_doc_str = col_json_string(i);
2450
136
            const JsonbDocument* json_doc = nullptr;
2451
136
            auto st = JsonbDocument::checkAndCreateDocument(json_doc_str.data, json_doc_str.size,
2452
136
                                                            &json_doc);
2453
136
            if (!st.ok()) {
2454
0
                return Status::InvalidArgument(
2455
0
                        "the json_doc argument at row {} is not a valid json document: {}", i,
2456
0
                        st.to_string());
2457
0
            }
2458
2459
136
            if (!one_null_check(i)) {
2460
132
                RETURN_IF_ERROR(one_check(i, &is_one));
2461
132
            }
2462
2463
134
            if (one_null_check(i) || search_null_check(i)) {
2464
20
                null_map->get_data()[i] = 1;
2465
20
                result_col->insert_data("", 0);
2466
20
                continue;
2467
20
            }
2468
2469
            // an error occurs if any path argument is not a valid path expression.
2470
114
            std::string root_path_str = "$";
2471
114
            JsonbPath root_path;
2472
114
            root_path.seek(root_path_str.c_str(), root_path_str.size());
2473
114
            std::vector<JsonbPath*> paths;
2474
114
            paths.push_back(&root_path);
2475
2476
114
            if (!search_is_const) {
2477
114
                state_ptr = std::make_shared<LikeState>();
2478
114
                state_ptr->is_like_pattern = true;
2479
114
                const auto& search_str = col_search_string->get_data_at(i);
2480
114
                RETURN_IF_ERROR(FunctionLike::construct_like_const_state(context, search_str,
2481
114
                                                                         state_ptr, false));
2482
114
                state = state_ptr.get();
2483
114
            }
2484
2485
            // maintain a hashset to deduplicate matches.
2486
114
            std::unordered_set<std::string> matches;
2487
114
            for (const auto& item : paths) {
2488
114
                auto* cur_path = item;
2489
114
                auto find = find_matches(json_doc->getValue(), is_one, state, cur_path, &matches);
2490
114
                if (is_one && find) {
2491
61
                    break;
2492
61
                }
2493
114
            }
2494
114
            if (matches.empty()) {
2495
                // returns NULL if the search_str is not found in the document.
2496
14
                null_map->get_data()[i] = 1;
2497
14
                result_col->insert_data("", 0);
2498
14
                continue;
2499
14
            }
2500
2501
100
            writer.reset();
2502
100
            make_result_str(writer, matches, result_col.get());
2503
100
        }
2504
46
        auto result_col_nullable =
2505
46
                ColumnNullable::create(std::move(result_col), std::move(null_map));
2506
46
        block.replace_by_position(result, std::move(result_col_nullable));
2507
46
        return Status::OK();
2508
48
    }
2509
2510
    static constexpr auto one = "one";
2511
    static constexpr auto all = "all";
2512
2513
public:
2514
    static constexpr auto name = "json_search";
2515
58
    static FunctionPtr create() { return std::make_shared<FunctionJsonSearch>(); }
2516
2517
1
    String get_name() const override { return name; }
2518
50
    bool is_variadic() const override { return false; }
2519
49
    size_t get_number_of_arguments() const override { return 3; }
2520
2521
49
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2522
49
        return make_nullable(std::make_shared<DataTypeJsonb>());
2523
49
    }
2524
2525
116
    bool use_default_implementation_for_nulls() const override { return false; }
2526
2527
206
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
2528
206
        if (scope != FunctionContext::THREAD_LOCAL) {
2529
49
            return Status::OK();
2530
49
        }
2531
157
        if (context->is_col_constant(2)) {
2532
77
            std::shared_ptr<LikeState> state = std::make_shared<LikeState>();
2533
77
            state->is_like_pattern = true;
2534
77
            const auto pattern_col = context->get_constant_col(2)->column_ptr;
2535
77
            const auto& pattern = pattern_col->get_data_at(0);
2536
77
            RETURN_IF_ERROR(
2537
77
                    FunctionLike::construct_like_const_state(context, pattern, state, false));
2538
77
            context->set_function_state(scope, state);
2539
77
        }
2540
157
        return Status::OK();
2541
157
    }
2542
2543
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2544
67
                        uint32_t result, size_t input_rows_count) const override {
2545
        // the json_doc, one_or_all, and search_str must be given.
2546
        // and we require the positions are static.
2547
67
        if (arguments.size() < 3) {
2548
0
            return Status::InvalidArgument("too few arguments for function {}", name);
2549
0
        }
2550
67
        if (arguments.size() > 3) {
2551
0
            return Status::NotSupported("escape and path params are not support now");
2552
0
        }
2553
2554
67
        CheckNullFun json_null_check = always_not_null;
2555
67
        GetJsonStringRefFun get_json_fun;
2556
        // prepare jsonb data column
2557
67
        auto&& [col_json, json_is_const] =
2558
67
                unpack_if_const(block.get_by_position(arguments[0]).column);
2559
67
        const auto* col_json_string = check_and_get_column<ColumnString>(col_json.get());
2560
67
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_json.get())) {
2561
67
            col_json_string =
2562
67
                    check_and_get_column<ColumnString>(nullable->get_nested_column_ptr().get());
2563
67
        }
2564
2565
67
        if (!col_json_string) {
2566
0
            return Status::RuntimeError("Illegal arg json {} should be ColumnString",
2567
0
                                        col_json->get_name());
2568
0
        }
2569
2570
67
        auto create_all_null_result = [&]() {
2571
6
            auto res_str = ColumnString::create();
2572
6
            res_str->insert_default();
2573
6
            auto res = ColumnNullable::create(std::move(res_str), ColumnUInt8::create(1, 1));
2574
6
            if (input_rows_count > 1) {
2575
6
                block.get_by_position(result).column =
2576
6
                        ColumnConst::create(std::move(res), input_rows_count);
2577
6
            } else {
2578
0
                block.get_by_position(result).column = std::move(res);
2579
0
            }
2580
6
            return Status::OK();
2581
6
        };
2582
2583
67
        if (json_is_const) {
2584
11
            if (col_json->is_null_at(0)) {
2585
2
                return create_all_null_result();
2586
9
            } else {
2587
9
                const auto& json_str = col_json_string->get_data_at(0);
2588
36
                get_json_fun = [json_str](size_t i) { return json_str; };
2589
9
            }
2590
56
        } else {
2591
148
            json_null_check = [col_json](size_t i) { return col_json->is_null_at(i); };
2592
132
            get_json_fun = [col_json_string](size_t i) { return col_json_string->get_data_at(i); };
2593
56
        }
2594
2595
        // one_or_all
2596
65
        CheckNullFun one_null_check = always_not_null;
2597
65
        OneFun one_check = always_one;
2598
65
        auto&& [col_one, one_is_const] =
2599
65
                unpack_if_const(block.get_by_position(arguments[1]).column);
2600
65
        one_is_const |= input_rows_count == 1;
2601
65
        const auto* col_one_string = check_and_get_column<ColumnString>(col_one.get());
2602
65
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_one.get())) {
2603
11
            col_one_string = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2604
11
        }
2605
65
        if (!col_one_string) {
2606
0
            return Status::RuntimeError("Illegal arg one {} should be ColumnString",
2607
0
                                        col_one->get_name());
2608
0
        }
2609
65
        if (one_is_const) {
2610
51
            if (col_one->is_null_at(0)) {
2611
4
                return create_all_null_result();
2612
47
            } else {
2613
47
                const auto& one_or_all = col_one_string->get_data_at(0);
2614
47
                std::string one_or_all_str = one_or_all.to_string();
2615
47
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2616
17
                    one_check = always_all;
2617
30
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2618
                    // nothing
2619
25
                } else {
2620
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2621
5
                    return Status::InvalidArgument(
2622
5
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2623
5
                }
2624
47
            }
2625
51
        } else {
2626
98
            one_null_check = [col_one](size_t i) { return col_one->is_null_at(i); };
2627
46
            one_check = [col_one_string](size_t i, bool* is_one) {
2628
46
                const auto& one_or_all = col_one_string->get_data_at(i);
2629
46
                std::string one_or_all_str = one_or_all.to_string();
2630
46
                if (strcasecmp(one_or_all_str.c_str(), all) == 0) {
2631
26
                    *is_one = false;
2632
26
                } else if (strcasecmp(one_or_all_str.c_str(), one) == 0) {
2633
18
                    *is_one = true;
2634
18
                } else {
2635
                    // an error occurs if the one_or_all argument is not 'one' nor 'all'.
2636
2
                    return Status::InvalidArgument(
2637
2
                            "the one_or_all argument {} is not 'one' not 'all'", one_or_all_str);
2638
2
                }
2639
44
                return Status::OK();
2640
46
            };
2641
14
        }
2642
2643
        // search_str
2644
56
        auto&& [col_search, search_is_const] =
2645
56
                unpack_if_const(block.get_by_position(arguments[2]).column);
2646
2647
56
        const auto* col_search_string = check_and_get_column<ColumnString>(col_search.get());
2648
56
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_search.get())) {
2649
30
            col_search_string =
2650
30
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
2651
30
        }
2652
56
        if (!col_search_string) {
2653
0
            return Status::RuntimeError("Illegal arg pattern {} should be ColumnString",
2654
0
                                        col_search->get_name());
2655
0
        }
2656
56
        if (search_is_const) {
2657
8
            CheckNullFun search_null_check = always_not_null;
2658
8
            if (col_search->is_null_at(0)) {
2659
0
                return create_all_null_result();
2660
0
            }
2661
8
            RETURN_IF_ERROR(execute_vector<true>(
2662
8
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2663
8
                    one_check, search_null_check, col_search_string, context, result));
2664
48
        } else {
2665
130
            CheckNullFun search_null_check = [col_search](size_t i) {
2666
130
                return col_search->is_null_at(i);
2667
130
            };
2668
48
            RETURN_IF_ERROR(execute_vector<false>(
2669
48
                    block, input_rows_count, json_null_check, get_json_fun, one_null_check,
2670
48
                    one_check, search_null_check, col_search_string, context, result));
2671
48
        }
2672
54
        return Status::OK();
2673
56
    }
2674
};
2675
2676
struct DocumentBuffer {
2677
    std::unique_ptr<char[]> ptr;
2678
    size_t size = 0;
2679
    size_t capacity = 0;
2680
};
2681
2682
class FunctionJsonbRemove : public IFunction {
2683
public:
2684
    static constexpr auto name = "jsonb_remove";
2685
    static constexpr auto alias = "json_remove";
2686
2687
31
    static FunctionPtr create() { return std::make_shared<FunctionJsonbRemove>(); }
2688
2689
0
    String get_name() const override { return name; }
2690
2691
0
    size_t get_number_of_arguments() const override { return 0; }
2692
23
    bool is_variadic() const override { return true; }
2693
2694
44
    bool use_default_implementation_for_nulls() const override { return false; }
2695
2696
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2697
22
        return make_nullable(std::make_shared<DataTypeJsonb>());
2698
22
    }
2699
2700
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2701
0
                        uint32_t result, size_t input_rows_count) const override {
2702
        // Check if arguments count is valid (json_doc + at least one path)
2703
0
        if (arguments.size() < 2) {
2704
0
            return Status::InvalidArgument("json_remove requires at least 2 arguments");
2705
0
        }
2706
2707
0
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
2708
0
        auto result_column = return_data_type->create_column();
2709
0
        auto& nullable_column = assert_cast<ColumnNullable&>(*result_column);
2710
0
        auto& res_chars =
2711
0
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_chars();
2712
0
        auto& res_offsets =
2713
0
                assert_cast<ColumnString&>(nullable_column.get_nested_column()).get_offsets();
2714
0
        auto& null_map = nullable_column.get_null_map_data();
2715
2716
0
        res_chars.reserve(input_rows_count * 64);
2717
0
        res_offsets.resize(input_rows_count);
2718
0
        null_map.resize_fill(input_rows_count, 0);
2719
2720
        // Get JSON document column
2721
0
        auto [json_column, json_const] =
2722
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
2723
0
        const auto* json_nullable = check_and_get_column<ColumnNullable>(json_column.get());
2724
0
        const ColumnString* json_data_column = nullptr;
2725
0
        const NullMap* json_null_map = nullptr;
2726
2727
0
        if (json_nullable) {
2728
0
            json_null_map = &json_nullable->get_null_map_data();
2729
0
            json_data_column =
2730
0
                    check_and_get_column<ColumnString>(&json_nullable->get_nested_column());
2731
0
        } else {
2732
0
            json_data_column = check_and_get_column<ColumnString>(json_column.get());
2733
0
        }
2734
2735
0
        if (!json_data_column) {
2736
0
            return Status::InvalidArgument("First argument must be a JSON document");
2737
0
        }
2738
2739
        // Parse paths
2740
0
        std::vector<const ColumnString*> path_columns;
2741
0
        std::vector<const NullMap*> path_null_maps;
2742
0
        std::vector<bool> path_constants;
2743
2744
0
        for (size_t i = 1; i < arguments.size(); ++i) {
2745
0
            auto [path_column, path_const] =
2746
0
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2747
0
            const auto* path_nullable = check_and_get_column<ColumnNullable>(path_column.get());
2748
2749
0
            if (path_nullable) {
2750
0
                path_null_maps.push_back(&path_nullable->get_null_map_data());
2751
0
                path_columns.push_back(
2752
0
                        check_and_get_column<ColumnString>(&path_nullable->get_nested_column()));
2753
0
            } else {
2754
0
                path_null_maps.push_back(nullptr);
2755
0
                path_columns.push_back(check_and_get_column<ColumnString>(path_column.get()));
2756
0
            }
2757
2758
0
            if (!path_columns.back()) {
2759
0
                return Status::InvalidArgument(
2760
0
                        fmt::format("Argument {} must be a string path", i + 1));
2761
0
            }
2762
2763
0
            path_constants.push_back(path_const);
2764
0
        }
2765
2766
        // Reusable JsonbWriter for performance
2767
0
        JsonbWriter writer;
2768
2769
0
        for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) {
2770
0
            size_t json_idx = index_check_const(row_idx, json_const);
2771
2772
            // Check if JSON document is null
2773
0
            if (json_null_map && (*json_null_map)[json_idx]) {
2774
0
                null_map[row_idx] = 1;
2775
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2776
0
                continue;
2777
0
            }
2778
2779
            // Parse JSON document
2780
0
            const auto& json_data = json_data_column->get_data_at(json_idx);
2781
0
            const JsonbDocument* json_doc = nullptr;
2782
0
            Status parse_status = JsonbDocument::checkAndCreateDocument(json_data.data,
2783
0
                                                                        json_data.size, &json_doc);
2784
2785
0
            if (!parse_status.ok() || !json_doc) {
2786
0
                null_map[row_idx] = 1;
2787
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2788
0
                continue;
2789
0
            }
2790
2791
            // Check if any path is null
2792
0
            bool has_null_path = false;
2793
0
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2794
0
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2795
0
                if (path_null_maps[path_idx] && (*path_null_maps[path_idx])[idx]) {
2796
0
                    has_null_path = true;
2797
0
                    break;
2798
0
                }
2799
0
            }
2800
2801
0
            if (has_null_path) {
2802
0
                null_map[row_idx] = 1;
2803
0
                res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2804
0
                continue;
2805
0
            }
2806
2807
0
            std::vector<JsonbPath> paths;
2808
2809
0
            for (size_t path_idx = 0; path_idx < path_columns.size(); ++path_idx) {
2810
0
                size_t idx = index_check_const(row_idx, path_constants[path_idx]);
2811
0
                const auto& path_data = path_columns[path_idx]->get_data_at(idx);
2812
2813
0
                JsonbPath path;
2814
0
                if (!path.seek(path_data.data, path_data.size)) {
2815
0
                    return Status::InvalidArgument(
2816
0
                            "Json path error: Invalid Json Path for value: {} at row: {}",
2817
0
                            std::string_view(path_data.data, path_data.size), row_idx);
2818
0
                }
2819
2820
0
                if (path.is_wildcard() || path.is_supper_wildcard()) {
2821
0
                    return Status::InvalidArgument(
2822
0
                            "In this situation, path expressions may not contain the * and ** "
2823
0
                            "tokens or an array range, argument index: {}, row index: {}",
2824
0
                            path_idx + 1, row_idx);
2825
0
                }
2826
2827
0
                paths.push_back(std::move(path));
2828
0
            }
2829
2830
0
            const JsonbValue* current_value = json_doc->getValue();
2831
2832
0
            DocumentBuffer tmp_buffer;
2833
2834
0
            for (size_t path_idx = 0; path_idx < paths.size(); ++path_idx) {
2835
0
                writer.reset();
2836
2837
0
                auto find_result = current_value->findValue(paths[path_idx]);
2838
2839
0
                if (find_result.is_wildcard) {
2840
0
                    continue;
2841
0
                }
2842
2843
0
                if (find_result.value) {
2844
0
                    RETURN_IF_ERROR(clone_without_path(current_value, paths[path_idx], writer));
2845
2846
0
                    auto* writer_output = writer.getOutput();
2847
0
                    if (writer_output->getSize() > tmp_buffer.capacity) {
2848
0
                        tmp_buffer.capacity =
2849
0
                                ((size_t(writer_output->getSize()) + 1024 - 1) / 1024) * 1024;
2850
0
                        tmp_buffer.ptr = std::make_unique<char[]>(tmp_buffer.capacity);
2851
0
                        DCHECK_LE(writer_output->getSize(), tmp_buffer.capacity);
2852
0
                    }
2853
2854
0
                    memcpy(tmp_buffer.ptr.get(), writer_output->getBuffer(),
2855
0
                           writer_output->getSize());
2856
0
                    tmp_buffer.size = writer_output->getSize();
2857
2858
0
                    const JsonbDocument* new_doc = nullptr;
2859
0
                    RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2860
0
                            tmp_buffer.ptr.get(), tmp_buffer.size, &new_doc));
2861
2862
0
                    current_value = new_doc->getValue();
2863
0
                }
2864
0
            }
2865
2866
0
            const JsonbDocument* modified_doc = nullptr;
2867
0
            if (current_value != json_doc->getValue()) {
2868
0
                RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(
2869
0
                        tmp_buffer.ptr.get(), tmp_buffer.size, &modified_doc));
2870
0
            } else {
2871
0
                modified_doc = json_doc;
2872
0
            }
2873
2874
            // Write the final result
2875
0
            const auto size = modified_doc->numPackedBytes();
2876
0
            res_chars.insert(reinterpret_cast<const char*>(modified_doc),
2877
0
                             reinterpret_cast<const char*>(modified_doc) + size);
2878
0
            res_offsets[row_idx] = static_cast<uint32_t>(res_chars.size());
2879
0
        }
2880
2881
0
        block.get_by_position(result).column = std::move(result_column);
2882
0
        return Status::OK();
2883
0
    }
2884
2885
private:
2886
    Status clone_without_path(const JsonbValue* root, const JsonbPath& path,
2887
24
                              JsonbWriter& writer) const {
2888
        // Start writing at the root level
2889
24
        if (root->isObject()) {
2890
15
            writer.writeStartObject();
2891
15
            RETURN_IF_ERROR(clone_object_without_path(root, path, 0, writer));
2892
15
            writer.writeEndObject();
2893
15
        } else if (root->isArray()) {
2894
9
            writer.writeStartArray();
2895
9
            RETURN_IF_ERROR(clone_array_without_path(root, path, 0, writer));
2896
9
            writer.writeEndArray();
2897
9
        } else {
2898
            // Primitive value - can't remove anything from it
2899
0
            writer.writeValue(root);
2900
0
        }
2901
24
        return Status::OK();
2902
24
    }
2903
2904
    Status clone_object_without_path(const JsonbValue* obj_value, const JsonbPath& path,
2905
20
                                     size_t depth, JsonbWriter& writer) const {
2906
20
        const auto* obj = obj_value->unpack<ObjectVal>();
2907
2908
40
        for (const auto& kv : *obj) {
2909
40
            std::string key(kv.getKeyStr(), kv.klen());
2910
2911
40
            if (depth < path.get_leg_vector_size()) {
2912
40
                const auto* leg = path.get_leg_from_leg_vector(depth);
2913
40
                if (leg->type == MEMBER_CODE) {
2914
40
                    std::string target_key(leg->leg_ptr, leg->leg_len);
2915
2916
40
                    if (key == target_key) {
2917
20
                        if (depth == path.get_leg_vector_size() - 1) {
2918
12
                            continue;
2919
12
                        } else {
2920
8
                            writer.writeKey(kv.getKeyStr(), kv.klen());
2921
8
                            if (kv.value()->isObject()) {
2922
3
                                writer.writeStartObject();
2923
3
                                RETURN_IF_ERROR(clone_object_without_path(kv.value(), path,
2924
3
                                                                          depth + 1, writer));
2925
3
                                writer.writeEndObject();
2926
5
                            } else if (kv.value()->isArray()) {
2927
5
                                writer.writeStartArray();
2928
5
                                RETURN_IF_ERROR(clone_array_without_path(kv.value(), path,
2929
5
                                                                         depth + 1, writer));
2930
5
                                writer.writeEndArray();
2931
5
                            } else {
2932
0
                                writer.writeValue(kv.value());
2933
0
                            }
2934
8
                        }
2935
20
                    } else {
2936
20
                        writer.writeKey(kv.getKeyStr(), kv.klen());
2937
20
                        writer.writeValue(kv.value());
2938
20
                    }
2939
40
                } else {
2940
0
                    writer.writeKey(kv.getKeyStr(), kv.klen());
2941
0
                    writer.writeValue(kv.value());
2942
0
                }
2943
40
            } else {
2944
0
                writer.writeKey(kv.getKeyStr(), kv.klen());
2945
0
                writer.writeValue(kv.value());
2946
0
            }
2947
40
        }
2948
2949
20
        return Status::OK();
2950
20
    }
2951
2952
    Status clone_array_without_path(const JsonbValue* arr_value, const JsonbPath& path,
2953
17
                                    size_t depth, JsonbWriter& writer) const {
2954
17
        const auto* arr = arr_value->unpack<ArrayVal>();
2955
2956
17
        int index = 0;
2957
52
        for (const auto& element : *arr) {
2958
52
            if (depth < path.get_leg_vector_size()) {
2959
52
                const auto* leg = path.get_leg_from_leg_vector(depth);
2960
52
                if (leg->type == ARRAY_CODE) {
2961
52
                    int target_index = leg->array_index;
2962
2963
52
                    if (index == target_index) {
2964
17
                        if (depth == path.get_leg_vector_size() - 1) {
2965
                            // This is the target element to remove - skip it
2966
12
                        } else {
2967
5
                            if (element.isObject()) {
2968
2
                                writer.writeStartObject();
2969
2
                                RETURN_IF_ERROR(clone_object_without_path(&element, path, depth + 1,
2970
2
                                                                          writer));
2971
2
                                writer.writeEndObject();
2972
3
                            } else if (element.isArray()) {
2973
3
                                writer.writeStartArray();
2974
3
                                RETURN_IF_ERROR(clone_array_without_path(&element, path, depth + 1,
2975
3
                                                                         writer));
2976
3
                                writer.writeEndArray();
2977
3
                            } else {
2978
0
                                writer.writeValue(&element);
2979
0
                            }
2980
5
                        }
2981
35
                    } else {
2982
35
                        writer.writeValue(&element);
2983
35
                    }
2984
52
                } else {
2985
0
                    writer.writeValue(&element);
2986
0
                }
2987
52
            } else {
2988
0
                writer.writeValue(&element);
2989
0
            }
2990
52
            index++;
2991
52
        }
2992
2993
17
        return Status::OK();
2994
17
    }
2995
};
2996
2997
class FunctionStripNullValue : public IFunction {
2998
public:
2999
    static constexpr auto name = "strip_null_value";
3000
24
    static FunctionPtr create() { return std::make_shared<FunctionStripNullValue>(); }
3001
3002
1
    String get_name() const override { return name; }
3003
16
    bool is_variadic() const override { return false; }
3004
15
    size_t get_number_of_arguments() const override { return 1; }
3005
3006
30
    bool use_default_implementation_for_nulls() const override { return false; }
3007
3008
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3009
15
        return make_nullable(std::make_shared<DataTypeJsonb>());
3010
15
    }
3011
3012
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3013
15
                        uint32_t result, size_t input_rows_count) const override {
3014
15
        const auto arg_column =
3015
15
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
3016
15
        const ColumnString* json_column = nullptr;
3017
15
        const NullMap* json_null_map = nullptr;
3018
15
        if (const auto* nullable_col = check_and_get_column<ColumnNullable>(arg_column.get())) {
3019
15
            json_column = assert_cast<const ColumnString*>(&nullable_col->get_nested_column());
3020
15
            json_null_map = &nullable_col->get_null_map_data();
3021
15
        } else {
3022
0
            json_column = assert_cast<const ColumnString*>(arg_column.get());
3023
0
        }
3024
3025
15
        auto return_data_type = make_nullable(std::make_shared<DataTypeJsonb>());
3026
15
        auto result_column = return_data_type->create_column();
3027
3028
15
        auto& result_nullmap = assert_cast<ColumnNullable&>(*result_column).get_null_map_data();
3029
15
        auto& result_data_col = assert_cast<ColumnString&>(
3030
15
                assert_cast<ColumnNullable&>(*result_column).get_nested_column());
3031
3032
15
        result_nullmap.resize_fill(input_rows_count, 0);
3033
60
        for (size_t i = 0; i != input_rows_count; ++i) {
3034
45
            if (json_null_map && (*json_null_map)[i]) {
3035
13
                result_nullmap[i] = 1;
3036
13
                result_data_col.insert_default();
3037
13
                continue;
3038
13
            }
3039
32
            const JsonbDocument* json_doc = nullptr;
3040
32
            const auto& json_str = json_column->get_data_at(i);
3041
32
            RETURN_IF_ERROR(
3042
32
                    JsonbDocument::checkAndCreateDocument(json_str.data, json_str.size, &json_doc));
3043
32
            if (json_doc) [[likely]] {
3044
32
                if (json_doc->getValue()->isNull()) {
3045
9
                    result_nullmap[i] = 1;
3046
9
                    result_data_col.insert_default();
3047
23
                } else {
3048
23
                    result_nullmap[i] = 0;
3049
23
                    result_data_col.insert_data(json_str.data, json_str.size);
3050
23
                }
3051
32
            } else {
3052
0
                result_nullmap[i] = 1;
3053
0
                result_data_col.insert_default();
3054
0
            }
3055
32
        }
3056
3057
15
        block.get_by_position(result).column = std::move(result_column);
3058
15
        return Status::OK();
3059
15
    }
3060
};
3061
3062
8
void register_function_jsonb(SimpleFunctionFactory& factory) {
3063
8
    factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
3064
8
    factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
3065
8
    factory.register_function<FunctionJsonbParseErrorNull>("json_parse_error_to_null");
3066
8
    factory.register_alias("json_parse_error_to_null", "jsonb_parse_error_to_null");
3067
8
    factory.register_function<FunctionJsonbParseErrorValue>("json_parse_error_to_value");
3068
8
    factory.register_alias("json_parse_error_to_value", "jsonb_parse_error_to_value");
3069
3070
8
    factory.register_function<FunctionJsonbExists>();
3071
8
    factory.register_alias(FunctionJsonbExists::name, FunctionJsonbExists::alias);
3072
8
    factory.register_function<FunctionJsonbType>();
3073
8
    factory.register_alias(FunctionJsonbType::name, FunctionJsonbType::alias);
3074
3075
8
    factory.register_function<FunctionJsonbKeys>();
3076
8
    factory.register_alias(FunctionJsonbKeys::name, FunctionJsonbKeys::alias);
3077
3078
8
    factory.register_function<FunctionJsonbExtractIsnull>();
3079
8
    factory.register_alias(FunctionJsonbExtractIsnull::name, FunctionJsonbExtractIsnull::alias);
3080
3081
8
    factory.register_function<FunctionJsonbExtractJsonb>();
3082
8
    factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias);
3083
8
    factory.register_function<FunctionJsonbExtractJsonbNoQuotes>();
3084
8
    factory.register_alias(FunctionJsonbExtractJsonbNoQuotes::name,
3085
8
                           FunctionJsonbExtractJsonbNoQuotes::alias);
3086
3087
8
    factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
3088
8
    factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
3089
3090
8
    factory.register_function<FunctionJsonSearch>();
3091
3092
8
    factory.register_function<FunctionJsonbArray<false>>();
3093
8
    factory.register_alias(FunctionJsonbArray<false>::name, FunctionJsonbArray<false>::alias);
3094
3095
8
    factory.register_function<FunctionJsonbArray<true>>("json_array_ignore_null");
3096
8
    factory.register_alias("json_array_ignore_null", "jsonb_array_ignore_null");
3097
3098
8
    factory.register_function<FunctionJsonbObject>();
3099
8
    factory.register_alias(FunctionJsonbObject::name, FunctionJsonbObject::alias);
3100
3101
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Insert>>();
3102
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Insert>::name,
3103
8
                           FunctionJsonbModify<JsonbModifyType::Insert>::alias);
3104
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Set>>();
3105
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Set>::name,
3106
8
                           FunctionJsonbModify<JsonbModifyType::Set>::alias);
3107
8
    factory.register_function<FunctionJsonbModify<JsonbModifyType::Replace>>();
3108
8
    factory.register_alias(FunctionJsonbModify<JsonbModifyType::Replace>::name,
3109
8
                           FunctionJsonbModify<JsonbModifyType::Replace>::alias);
3110
3111
8
    factory.register_function<FunctionJsonbRemove>();
3112
8
    factory.register_alias(FunctionJsonbRemove::name, FunctionJsonbRemove::alias);
3113
3114
8
    factory.register_function<FunctionStripNullValue>();
3115
8
}
3116
3117
} // namespace doris