Coverage Report

Created: 2026-05-22 20:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/jsonb_document.h"
19
20
#include <memory>
21
#include <string>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
30
27.4k
                                             const JsonbDocument** doc) {
31
27.4k
    *doc = nullptr;
32
27.4k
    if (!pb || size == 0) {
33
3
        static const std::string buf = []() {
34
1
            JsonbWriter writer;
35
1
            (void)writer.writeNull();
36
1
            auto* out = writer.getOutput();
37
1
            return std::string(out->getBuffer(), out->getSize());
38
1
        }();
39
        // Treat empty input as a valid JSONB null document.
40
3
        *doc = reinterpret_cast<const JsonbDocument*>(buf.data());
41
3
        return Status::OK();
42
3
    }
43
27.4k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
44
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
45
0
                                       size);
46
0
    }
47
48
27.4k
    const auto* doc_ptr = (const JsonbDocument*)pb;
49
27.4k
    if (doc_ptr->header_.ver_ != JSONB_VER) {
50
5
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
51
5
                                       doc_ptr->header_.ver_);
52
5
    }
53
54
27.4k
    const auto* val = (const JsonbValue*)doc_ptr->payload_;
55
    // Keep this check lightweight. This API is used by JSONB scalar/table functions on every row,
56
    // so recursively validating object/array payloads here would add an O(document size) scan before
57
    // the real operation and can regress large JSONB queries. External INSERT/LOAD paths build JSONB
58
    // through JsonBinaryValue/JsonbWriter before storage; any untrusted raw binary boundary should
59
    // add explicit deep validation there instead of changing this hot-path helper.
60
27.4k
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
61
27.4k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
62
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
63
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
64
0
    }
65
66
27.4k
    *doc = doc_ptr;
67
27.4k
    return Status::OK();
68
27.4k
}
69
70
96
JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
71
96
    JsonbFindResult result;
72
96
    bool is_wildcard = false;
73
74
96
    std::vector<const JsonbValue*> values;
75
96
    std::vector<const JsonbValue*> results;
76
96
    results.emplace_back(this);
77
78
96
    if (path.is_supper_wildcard()) {
79
0
        std::function<void(const JsonbValue*)> foreach_values;
80
0
        foreach_values = [&](const JsonbValue* val) {
81
0
            if (val->isObject()) {
82
0
                for (const auto& it : *val->unpack<ObjectVal>()) {
83
0
                    results.emplace_back(it.value());
84
0
                    foreach_values(it.value());
85
0
                }
86
0
            } else if (val->isArray()) {
87
0
                for (const auto& it : *val->unpack<ArrayVal>()) {
88
0
                    results.emplace_back(&it);
89
0
                    foreach_values(&it);
90
0
                }
91
0
            }
92
0
        };
93
0
        is_wildcard = true;
94
0
        foreach_values(this);
95
0
    }
96
97
187
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
98
91
        values = std::move(results);
99
91
        for (const auto* pval : values) {
100
90
            switch (path.get_leg_from_leg_vector(i)->type) {
101
32
            case MEMBER_CODE: {
102
32
                if (LIKELY(pval->type == JsonbType::T_Object)) {
103
6
                    if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
104
6
                        *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
105
0
                        is_wildcard = true;
106
0
                        for (const auto& it : *pval->unpack<ObjectVal>()) {
107
0
                            results.emplace_back(it.value());
108
0
                        }
109
0
                        continue;
110
0
                    }
111
112
6
                    pval = pval->unpack<ObjectVal>()->find(
113
6
                            path.get_leg_from_leg_vector(i)->leg_ptr,
114
6
                            path.get_leg_from_leg_vector(i)->leg_len);
115
116
6
                    if (pval) {
117
4
                        results.emplace_back(pval);
118
4
                    }
119
6
                }
120
32
                continue;
121
32
            }
122
58
            case ARRAY_CODE: {
123
58
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
124
58
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
125
0
                    if (LIKELY(pval->type == JsonbType::T_Array)) {
126
0
                        is_wildcard = true;
127
0
                        for (const auto& it : *pval->unpack<ArrayVal>()) {
128
0
                            results.emplace_back(&it);
129
0
                        }
130
0
                    }
131
0
                    continue;
132
0
                }
133
134
58
                if (pval->type != JsonbType::T_Array &&
135
58
                    path.get_leg_from_leg_vector(i)->array_index == 0) {
136
                    // Same as mysql and postgres
137
22
                    results.emplace_back(pval);
138
22
                    continue;
139
22
                }
140
141
36
                if (pval->type != JsonbType::T_Array ||
142
36
                    path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
143
36
                    path.get_leg_from_leg_vector(i)->leg_len != 0) {
144
11
                    continue;
145
11
                }
146
147
25
                if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
148
25
                    pval = pval->unpack<ArrayVal>()->get(
149
25
                            path.get_leg_from_leg_vector(i)->array_index);
150
25
                } else {
151
0
                    pval = pval->unpack<ArrayVal>()->get(
152
0
                            pval->unpack<ArrayVal>()->numElem() +
153
0
                            path.get_leg_from_leg_vector(i)->array_index);
154
0
                }
155
156
25
                if (pval) {
157
18
                    results.emplace_back(pval);
158
18
                }
159
25
                continue;
160
36
            }
161
90
            }
162
90
        }
163
91
    }
164
165
96
    if (is_wildcard) {
166
0
        result.is_wildcard = true;
167
0
        if (results.empty()) {
168
0
            result.value = nullptr; // No values found
169
0
        } else {
170
            /// if supper wildcard, need distinct results
171
            /// because supper wildcard will traverse all nodes
172
            ///
173
            /// `select json_extract( '[1]', '$**[0]' );`
174
            /// +---------------------------------+
175
            /// | json_extract( '[1]', '$**[0]' ) |
176
            /// +---------------------------------+
177
            /// | [1,1]                           |
178
            /// +---------------------------------+
179
0
            if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] {
180
0
                std::set<const JsonbValue*> distinct_results;
181
0
                for (const auto* pval : results) {
182
0
                    distinct_results.insert(pval);
183
0
                }
184
0
                results.assign(distinct_results.begin(), distinct_results.end());
185
0
            }
186
0
            result.writer = std::make_unique<JsonbWriter>();
187
0
            result.writer->writeStartArray();
188
0
            for (const auto* pval : results) {
189
0
                result.writer->writeValue(pval);
190
0
            }
191
0
            result.writer->writeEndArray();
192
193
0
            const JsonbDocument* doc = nullptr;
194
0
            THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
195
0
                    result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(),
196
0
                    &doc));
197
0
            result.value = doc->getValue();
198
0
        }
199
96
    } else if (results.size() == 1) {
200
50
        result.value = results[0];
201
50
    }
202
203
96
    return result;
204
96
}
205
206
std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs()
207
0
        const {
208
0
    std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
209
0
    const auto* obj_val = this;
210
0
    for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
211
0
        kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
212
0
    }
213
    // sort by key
214
0
    std::sort(kvs.begin(), kvs.end(),
215
0
              [](const auto& left, const auto& right) { return left.first < right.first; });
216
    // unique by key
217
0
    kvs.erase(std::unique(kvs.begin(), kvs.end(),
218
0
                          [](const auto& left, const auto& right) {
219
0
                              return left.first == right.first;
220
0
                          }),
221
0
              kvs.end());
222
0
    return kvs;
223
0
}
224
225
} // namespace doris