Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_jsonb_transform.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <vector>
19
20
#include "core/data_type/data_type_jsonb.h"
21
#include "core/data_type/primitive_type.h"
22
#include "exprs/function/simple_function_factory.h"
23
#include "util/jsonb_document.h"
24
#include "util/jsonb_document_cast.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
// Sort the keys of the JSON object and deduplicate the repeated keys, keeping the first one
30
0
void sort_json_object_keys(JsonbWriter& jsonb_writer, const JsonbValue* jsonb_value) {
31
0
    if (jsonb_value->isObject()) {
32
0
        std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
33
0
        const auto* obj_val = jsonb_value->unpack<ObjectVal>();
34
0
        for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
35
0
            kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
36
0
        }
37
        // sort by key
38
0
        std::sort(kvs.begin(), kvs.end(),
39
0
                  [](const auto& left, const auto& right) { return left.first < right.first; });
40
        // unique by key
41
0
        kvs.erase(std::unique(kvs.begin(), kvs.end(),
42
0
                              [](const auto& left, const auto& right) {
43
0
                                  return left.first == right.first;
44
0
                              }),
45
0
                  kvs.end());
46
0
        jsonb_writer.writeStartObject();
47
0
        for (const auto& kv : kvs) {
48
0
            jsonb_writer.writeKey(kv.first.data, static_cast<uint8_t>(kv.first.size));
49
0
            sort_json_object_keys(jsonb_writer, kv.second);
50
0
        }
51
0
        jsonb_writer.writeEndObject();
52
0
    } else if (jsonb_value->isArray()) {
53
0
        const auto* array_val = jsonb_value->unpack<ArrayVal>();
54
0
        jsonb_writer.writeStartArray();
55
0
        for (auto it = array_val->begin(); it != array_val->end(); ++it) {
56
0
            sort_json_object_keys(jsonb_writer, &*it);
57
0
        }
58
0
        jsonb_writer.writeEndArray();
59
0
    } else {
60
        // scalar value
61
0
        jsonb_writer.writeValue(jsonb_value);
62
0
    }
63
0
}
64
65
// Convert all numeric types in JSON to double type
66
0
void normalize_json_numbers_to_double(JsonbWriter& jsonb_writer, const JsonbValue* jsonb_value) {
67
0
    if (jsonb_value->isObject()) {
68
0
        jsonb_writer.writeStartObject();
69
0
        const auto* obj_val = jsonb_value->unpack<ObjectVal>();
70
0
        for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
71
0
            jsonb_writer.writeKey(it->getKeyStr(), it->klen());
72
0
            normalize_json_numbers_to_double(jsonb_writer, it->value());
73
0
        }
74
0
        jsonb_writer.writeEndObject();
75
0
    } else if (jsonb_value->isArray()) {
76
0
        const auto* array_val = jsonb_value->unpack<ArrayVal>();
77
0
        jsonb_writer.writeStartArray();
78
0
        for (auto it = array_val->begin(); it != array_val->end(); ++it) {
79
0
            normalize_json_numbers_to_double(jsonb_writer, &*it);
80
0
        }
81
0
        jsonb_writer.writeEndArray();
82
0
    } else {
83
        // scalar value
84
0
        if (jsonb_value->isInt() || jsonb_value->isFloat() || jsonb_value->isDouble() ||
85
0
            jsonb_value->isDecimal()) {
86
0
            double to;
87
0
            CastParameters params;
88
0
            params.is_strict = false;
89
0
            JsonbCast::cast_from_json_to_float(jsonb_value, to, params);
90
0
            NormalizeFloat(to);
91
0
            jsonb_writer.writeDouble(to);
92
0
        } else {
93
0
            jsonb_writer.writeValue(jsonb_value);
94
0
        }
95
0
    }
96
0
}
97
98
// Input jsonb, output jsonb
99
template <typename Impl>
100
class FunctionJsonbTransform : public IFunction {
101
public:
102
    static constexpr auto name = Impl::name;
103
104
4
    static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); }
_ZN5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE6createEv
Line
Count
Source
104
2
    static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); }
_ZN5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE6createEv
Line
Count
Source
104
2
    static FunctionPtr create() { return std::make_shared<FunctionJsonbTransform>(); }
105
106
2
    String get_name() const override { return name; }
_ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE8get_nameB5cxx11Ev
Line
Count
Source
106
1
    String get_name() const override { return name; }
_ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE8get_nameB5cxx11Ev
Line
Count
Source
106
1
    String get_name() const override { return name; }
107
108
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
109
0
        return std::make_shared<DataTypeJsonb>();
110
0
    }
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
111
112
0
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE23get_number_of_argumentsEv
113
114
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
115
0
                        uint32_t result, size_t size) const override {
116
0
        auto input_column = block.get_by_position(arguments[0]).column;
117
0
        auto to_column = ColumnString::create();
118
119
0
        const auto& input_jsonb_column = assert_cast<const ColumnString&>(*input_column);
120
121
0
        to_column->get_chars().reserve(input_jsonb_column.get_chars().size());
122
0
        to_column->get_offsets().reserve(input_jsonb_column.get_offsets().size());
123
124
0
        JsonbWriter writer;
125
0
        for (size_t i = 0; i < size; ++i) {
126
0
            StringRef val = input_jsonb_column.get_data_at(i);
127
0
            const JsonbDocument* doc = nullptr;
128
0
            auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc);
129
0
            if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
130
                // mayby be invalid jsonb, just insert default
131
                // invalid jsonb value may be caused by the default null processing
132
                // insert empty string
133
0
                to_column->insert_default();
134
0
                continue;
135
0
            }
136
0
            const JsonbValue* value = doc->getValue();
137
0
            if (UNLIKELY(!value)) {
138
                // mayby be invalid jsonb, just insert default
139
                // invalid jsonb value may be caused by the default null processing
140
                // insert empty string
141
0
                to_column->insert_default();
142
0
                continue;
143
0
            }
144
145
0
            writer.reset();
146
147
0
            Impl::transform(writer, value);
148
149
0
            to_column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
150
0
        }
151
0
        block.get_by_position(result).column = std::move(to_column);
152
0
        return Status::OK();
153
0
    }
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_18SortJsonObjectKeysEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris22FunctionJsonbTransformINS_28NormalizeJsonNumbersToDoubleEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
154
};
155
156
struct SortJsonObjectKeys {
157
    static constexpr auto name = "sort_json_object_keys";
158
0
    static void transform(JsonbWriter& writer, const JsonbValue* value) {
159
0
        sort_json_object_keys(writer, value);
160
0
    }
161
};
162
163
struct NormalizeJsonNumbersToDouble {
164
    static constexpr auto name = "normalize_json_numbers_to_double";
165
0
    static void transform(JsonbWriter& writer, const JsonbValue* value) {
166
0
        normalize_json_numbers_to_double(writer, value);
167
0
    }
168
};
169
170
using FunctionSortJsonObjectKeys = FunctionJsonbTransform<SortJsonObjectKeys>;
171
using FunctionNormalizeJsonNumbersToDouble = FunctionJsonbTransform<NormalizeJsonNumbersToDouble>;
172
173
1
void register_function_json_transform(SimpleFunctionFactory& factory) {
174
1
    factory.register_function<FunctionSortJsonObjectKeys>();
175
1
    factory.register_function<FunctionNormalizeJsonNumbersToDouble>();
176
177
1
    factory.register_alias(FunctionSortJsonObjectKeys::name, "sort_jsonb_object_keys");
178
1
    factory.register_alias(FunctionNormalizeJsonNumbersToDouble::name,
179
1
                           "normalize_jsonb_numbers_to_double");
180
1
}
181
182
} // namespace doris