Coverage Report

Created: 2026-06-03 11:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_json.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <glog/logging.h>
19
#include <rapidjson/allocators.h>
20
#include <rapidjson/document.h>
21
#include <rapidjson/rapidjson.h>
22
#include <rapidjson/stringbuffer.h>
23
#include <rapidjson/writer.h>
24
25
#include <memory>
26
#include <string_view>
27
#include <utility>
28
#include <vector>
29
30
#include "common/cast_set.h"
31
#include "common/compiler_util.h" // IWYU pragma: keep
32
#include "common/status.h"
33
#include "core/assert_cast.h"
34
#include "core/block/block.h"
35
#include "core/block/column_numbers.h"
36
#include "core/block/column_with_type_and_name.h"
37
#include "core/column/column.h"
38
#include "core/column/column_nullable.h"
39
#include "core/column/column_string.h"
40
#include "core/column/column_vector.h"
41
#include "core/data_type/data_type.h"
42
#include "core/data_type/data_type_nullable.h"
43
#include "core/data_type/data_type_number.h"
44
#include "core/data_type/data_type_string.h"
45
#include "core/string_ref.h"
46
#include "core/types.h"
47
#include "core/value/jsonb_value.h"
48
#include "exprs/function/function.h"
49
#include "exprs/function/simple_function_factory.h"
50
51
namespace doris {
52
class FunctionContext;
53
} // namespace doris
54
55
namespace doris {
56
struct FunctionJsonQuoteImpl {
57
    static constexpr auto name = "json_quote";
58
59
7
    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
60
7
        if (!arguments.empty() && arguments[0] && arguments[0]->is_nullable()) {
61
0
            return make_nullable(std::make_shared<DataTypeString>());
62
0
        }
63
7
        return std::make_shared<DataTypeString>();
64
7
    }
65
    static void execute(const std::vector<const ColumnString*>& data_columns,
66
16
                        ColumnString& result_column, size_t input_rows_count) {
67
16
        rapidjson::Document document;
68
16
        rapidjson::Document::AllocatorType& allocator = document.GetAllocator();
69
70
16
        rapidjson::Value value;
71
72
16
        rapidjson::StringBuffer buf;
73
74
42
        for (int i = 0; i < input_rows_count; i++) {
75
26
            StringRef data = data_columns[0]->get_data_at(i);
76
26
            value.SetString(data.data, cast_set<rapidjson::SizeType>(data.size), allocator);
77
78
26
            buf.Clear();
79
26
            rapidjson::Writer<rapidjson::StringBuffer> writer(buf);
80
26
            value.Accept(writer);
81
26
            result_column.insert_data(buf.GetString(), buf.GetSize());
82
26
        }
83
16
    }
84
};
85
86
template <typename Impl>
87
class FunctionJson : public IFunction {
88
public:
89
    static constexpr auto name = Impl::name;
90
91
16
    static FunctionPtr create() { return std::make_shared<FunctionJson<Impl>>(); }
92
93
0
    String get_name() const override { return name; }
94
95
0
    size_t get_number_of_arguments() const override { return 0; }
96
97
8
    bool is_variadic() const override { return true; }
98
99
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
100
7
        return Impl::get_return_type_impl(arguments);
101
7
    }
102
103
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
104
16
                        uint32_t result, size_t input_rows_count) const override {
105
16
        auto result_column = ColumnString::create();
106
107
16
        std::vector<ColumnPtr> column_ptrs; // prevent converted column destruct
108
16
        std::vector<const ColumnString*> data_columns;
109
16
        for (unsigned int argument : arguments) {
110
16
            column_ptrs.push_back(
111
16
                    block.get_by_position(argument).column->convert_to_full_column_if_const());
112
16
            data_columns.push_back(assert_cast<const ColumnString*>(column_ptrs.back().get()));
113
16
        }
114
115
16
        Impl::execute(data_columns, *result_column.get(), input_rows_count);
116
16
        block.get_by_position(result).column = std::move(result_column);
117
16
        return Status::OK();
118
16
    }
119
};
120
121
class FunctionJsonValid : public IFunction {
122
public:
123
    static constexpr auto name = "json_valid";
124
25
    static FunctionPtr create() { return std::make_shared<FunctionJsonValid>(); }
125
126
1
    String get_name() const override { return name; }
127
128
16
    size_t get_number_of_arguments() const override { return 1; }
129
130
16
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
131
16
        return make_nullable(std::make_shared<DataTypeInt32>());
132
16
    }
133
134
68
    bool use_default_implementation_for_nulls() const override { return false; }
135
136
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
137
52
                        uint32_t result, size_t input_rows_count) const override {
138
52
        const IColumn& col_from = *(block.get_by_position(arguments[0]).column);
139
140
52
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
141
142
52
        const ColumnUInt8::Container* input_null_map = nullptr;
143
52
        const ColumnString* col_from_string = nullptr;
144
52
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_from)) {
145
34
            input_null_map = &nullable->get_null_map_data();
146
34
            col_from_string =
147
34
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
148
34
        } else {
149
18
            col_from_string = check_and_get_column<ColumnString>(col_from);
150
18
        }
151
152
52
        if (!col_from_string) {
153
0
            return Status::RuntimeError("Illegal column {} should be ColumnString",
154
0
                                        col_from.get_name());
155
0
        }
156
157
52
        auto col_to = ColumnInt32::create();
158
52
        auto& vec_to = col_to->get_data();
159
52
        size_t size = col_from.size();
160
52
        vec_to.resize(size);
161
162
        // parser can be reused for performance
163
164
52
        auto input_type = block.get_by_position(arguments[0]).type->get_primitive_type();
165
166
52
        if (input_type == PrimitiveType::TYPE_VARCHAR || input_type == PrimitiveType::TYPE_CHAR ||
167
52
            input_type == PrimitiveType::TYPE_STRING) {
168
12
            JsonBinaryValue jsonb_value;
169
24
            for (size_t i = 0; i < input_rows_count; ++i) {
170
12
                if (input_null_map && (*input_null_map)[i]) {
171
4
                    null_map->get_data()[i] = 1;
172
4
                    vec_to[i] = 0;
173
4
                    continue;
174
4
                }
175
176
8
                const auto& val = col_from_string->get_data_at(i);
177
8
                if (jsonb_value.from_json_string(val.data, cast_set<unsigned int>(val.size)).ok()) {
178
4
                    vec_to[i] = 1;
179
4
                } else {
180
4
                    vec_to[i] = 0;
181
4
                }
182
8
            }
183
184
40
        } else {
185
40
            DCHECK(input_type == PrimitiveType::TYPE_JSONB);
186
143
            for (size_t i = 0; i < input_rows_count; ++i) {
187
103
                if (input_null_map && (*input_null_map)[i]) {
188
6
                    null_map->get_data()[i] = 1;
189
6
                    vec_to[i] = 0;
190
6
                    continue;
191
6
                }
192
97
                const auto& val = col_from_string->get_data_at(i);
193
97
                if (val.size == 0) {
194
0
                    vec_to[i] = 0;
195
0
                    continue;
196
0
                }
197
97
                const JsonbDocument* doc = nullptr;
198
97
                auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc);
199
97
                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
200
0
                    vec_to[i] = 0;
201
0
                    continue;
202
0
                }
203
97
                const JsonbValue* value = doc->getValue();
204
97
                if (UNLIKELY(!value)) {
205
0
                    vec_to[i] = 0;
206
0
                    continue;
207
0
                }
208
97
                vec_to[i] = 1;
209
97
            }
210
40
        }
211
212
52
        block.replace_by_position(result,
213
52
                                  ColumnNullable::create(std::move(col_to), std::move(null_map)));
214
215
52
        return Status::OK();
216
52
    }
217
};
218
class FunctionJsonUnquote : public IFunction {
219
public:
220
    static constexpr auto name = "json_unquote";
221
21
    static FunctionPtr create() { return std::make_shared<FunctionJsonUnquote>(); }
222
223
1
    String get_name() const override { return name; }
224
225
12
    size_t get_number_of_arguments() const override { return 1; }
226
227
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
228
12
        return make_nullable(std::make_shared<DataTypeString>());
229
12
    }
230
231
24
    bool use_default_implementation_for_nulls() const override { return false; }
232
233
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
234
12
                        uint32_t result, size_t input_rows_count) const override {
235
12
        const IColumn& col_from = *(block.get_by_position(arguments[0]).column);
236
237
12
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
238
239
12
        const auto* col_from_string = check_and_get_column<ColumnString>(col_from);
240
12
        if (const auto* nullable = check_and_get_column<ColumnNullable>(col_from)) {
241
4
            col_from_string =
242
4
                    check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
243
4
        }
244
245
12
        if (!col_from_string) {
246
0
            return Status::RuntimeError("Illegal column {} should be ColumnString",
247
0
                                        col_from.get_name());
248
0
        }
249
250
12
        auto col_to = ColumnString::create();
251
12
        col_to->reserve(input_rows_count);
252
253
        // parser can be reused for performance
254
12
        rapidjson::Document document;
255
24
        for (size_t i = 0; i < input_rows_count; ++i) {
256
12
            if (col_from.is_null_at(i)) {
257
3
                null_map->get_data()[i] = 1;
258
3
                col_to->insert_data(nullptr, 0);
259
3
                continue;
260
3
            }
261
262
9
            const auto& json_str = col_from_string->get_data_at(i);
263
9
            if (json_str.size < 2 || json_str.data[0] != '"' ||
264
9
                json_str.data[json_str.size - 1] != '"') {
265
                // non-quoted string
266
6
                col_to->insert_data(json_str.data, json_str.size);
267
6
            } else {
268
3
                document.Parse(json_str.data, json_str.size);
269
3
                if (document.HasParseError() || !document.IsString()) {
270
0
                    return Status::RuntimeError(
271
0
                            fmt::format("Invalid JSON text in argument 1 to function {}: {}", name,
272
0
                                        std::string_view(json_str.data, json_str.size)));
273
0
                }
274
3
                col_to->insert_data(document.GetString(), document.GetStringLength());
275
3
            }
276
9
        }
277
278
12
        block.replace_by_position(result,
279
12
                                  ColumnNullable::create(std::move(col_to), std::move(null_map)));
280
281
12
        return Status::OK();
282
12
    }
283
};
284
285
8
void register_function_json(SimpleFunctionFactory& factory) {
286
8
    factory.register_function<FunctionJsonUnquote>();
287
288
8
    factory.register_function<FunctionJson<FunctionJsonQuoteImpl>>();
289
290
8
    factory.register_function<FunctionJsonValid>();
291
8
}
292
293
} // namespace doris