Coverage Report

Created: 2026-05-20 18:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/jsonb_document.h"
19
20
#include <memory>
21
#include <string>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
30
1.06M
                                             const JsonbDocument** doc) {
31
1.06M
    *doc = nullptr;
32
1.06M
    if (!pb || size == 0) {
33
54
        static const std::string buf = []() {
34
2
            JsonbWriter writer;
35
2
            (void)writer.writeNull();
36
2
            auto* out = writer.getOutput();
37
2
            return std::string(out->getBuffer(), out->getSize());
38
2
        }();
39
        // Treat empty input as a valid JSONB null document.
40
54
        *doc = reinterpret_cast<const JsonbDocument*>(buf.data());
41
54
        return Status::OK();
42
54
    }
43
1.06M
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
44
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
45
0
                                       size);
46
0
    }
47
48
1.06M
    const auto* doc_ptr = (const JsonbDocument*)pb;
49
1.06M
    if (doc_ptr->header_.ver_ != JSONB_VER) {
50
5
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
51
5
                                       doc_ptr->header_.ver_);
52
5
    }
53
54
1.06M
    const auto* val = (const JsonbValue*)doc_ptr->payload_;
55
    // Keep this check lightweight. This API is used by JSONB scalar/table functions on every row,
56
    // so recursively validating object/array payloads here would add an O(document size) scan before
57
    // the real operation and can regress large JSONB queries. External INSERT/LOAD paths build JSONB
58
    // through JsonBinaryValue/JsonbWriter before storage; any untrusted raw binary boundary should
59
    // add explicit deep validation there instead of changing this hot-path helper.
60
1.06M
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
61
1.06M
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
62
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
63
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
64
0
    }
65
66
1.06M
    *doc = doc_ptr;
67
1.06M
    return Status::OK();
68
1.06M
}
69
70
144k
JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
71
144k
    JsonbFindResult result;
72
144k
    bool is_wildcard = false;
73
74
144k
    std::vector<const JsonbValue*> values;
75
144k
    std::vector<const JsonbValue*> results;
76
144k
    results.emplace_back(this);
77
78
144k
    if (path.is_supper_wildcard()) {
79
7
        std::function<void(const JsonbValue*)> foreach_values;
80
56
        foreach_values = [&](const JsonbValue* val) {
81
56
            if (val->isObject()) {
82
20
                for (const auto& it : *val->unpack<ObjectVal>()) {
83
20
                    results.emplace_back(it.value());
84
20
                    foreach_values(it.value());
85
20
                }
86
46
            } else if (val->isArray()) {
87
29
                for (const auto& it : *val->unpack<ArrayVal>()) {
88
29
                    results.emplace_back(&it);
89
29
                    foreach_values(&it);
90
29
                }
91
13
            }
92
56
        };
93
7
        is_wildcard = true;
94
7
        foreach_values(this);
95
7
    }
96
97
381k
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
98
239k
        values = std::move(results);
99
239k
        for (const auto* pval : values) {
100
226k
            switch (path.get_leg_from_leg_vector(i)->type) {
101
20.9k
            case MEMBER_CODE: {
102
20.9k
                if (LIKELY(pval->type == JsonbType::T_Object)) {
103
7.13k
                    if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
104
7.13k
                        *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
105
83
                        is_wildcard = true;
106
152
                        for (const auto& it : *pval->unpack<ObjectVal>()) {
107
152
                            results.emplace_back(it.value());
108
152
                        }
109
83
                        continue;
110
83
                    }
111
112
7.05k
                    pval = pval->unpack<ObjectVal>()->find(
113
7.05k
                            path.get_leg_from_leg_vector(i)->leg_ptr,
114
7.05k
                            path.get_leg_from_leg_vector(i)->leg_len);
115
116
7.05k
                    if (pval) {
117
2.61k
                        results.emplace_back(pval);
118
2.61k
                    }
119
7.05k
                }
120
20.8k
                continue;
121
20.9k
            }
122
204k
            case ARRAY_CODE: {
123
204k
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
124
204k
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
125
28
                    if (LIKELY(pval->type == JsonbType::T_Array)) {
126
22
                        is_wildcard = true;
127
53
                        for (const auto& it : *pval->unpack<ArrayVal>()) {
128
53
                            results.emplace_back(&it);
129
53
                        }
130
22
                    }
131
28
                    continue;
132
28
                }
133
134
204k
                if (pval->type != JsonbType::T_Array &&
135
204k
                    path.get_leg_from_leg_vector(i)->array_index == 0) {
136
                    // Same as mysql and postgres
137
1.11k
                    results.emplace_back(pval);
138
1.11k
                    continue;
139
1.11k
                }
140
141
203k
                if (pval->type != JsonbType::T_Array ||
142
203k
                    path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
143
203k
                    path.get_leg_from_leg_vector(i)->leg_len != 0) {
144
7.29k
                    continue;
145
7.29k
                }
146
147
195k
                if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
148
189k
                    pval = pval->unpack<ArrayVal>()->get(
149
189k
                            path.get_leg_from_leg_vector(i)->array_index);
150
189k
                } else {
151
6.17k
                    pval = pval->unpack<ArrayVal>()->get(
152
6.17k
                            pval->unpack<ArrayVal>()->numElem() +
153
6.17k
                            path.get_leg_from_leg_vector(i)->array_index);
154
6.17k
                }
155
156
195k
                if (pval) {
157
186k
                    results.emplace_back(pval);
158
186k
                }
159
195k
                continue;
160
203k
            }
161
226k
            }
162
226k
        }
163
239k
    }
164
165
142k
    if (is_wildcard) {
166
84
        result.is_wildcard = true;
167
84
        if (results.empty()) {
168
15
            result.value = nullptr; // No values found
169
69
        } else {
170
            /// if supper wildcard, need distinct results
171
            /// because supper wildcard will traverse all nodes
172
            ///
173
            /// `select json_extract( '[1]', '$**[0]' );`
174
            /// +---------------------------------+
175
            /// | json_extract( '[1]', '$**[0]' ) |
176
            /// +---------------------------------+
177
            /// | [1,1]                           |
178
            /// +---------------------------------+
179
69
            if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] {
180
4
                std::set<const JsonbValue*> distinct_results;
181
17
                for (const auto* pval : results) {
182
17
                    distinct_results.insert(pval);
183
17
                }
184
4
                results.assign(distinct_results.begin(), distinct_results.end());
185
4
            }
186
69
            result.writer = std::make_unique<JsonbWriter>();
187
69
            result.writer->writeStartArray();
188
176
            for (const auto* pval : results) {
189
176
                result.writer->writeValue(pval);
190
176
            }
191
69
            result.writer->writeEndArray();
192
193
69
            const JsonbDocument* doc = nullptr;
194
69
            THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
195
69
                    result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(),
196
69
                    &doc));
197
69
            result.value = doc->getValue();
198
69
        }
199
142k
    } else if (results.size() == 1) {
200
120k
        result.value = results[0];
201
120k
    }
202
203
142k
    return result;
204
142k
}
205
206
std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs()
207
44
        const {
208
44
    std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
209
44
    const auto* obj_val = this;
210
157
    for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
211
113
        kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
212
113
    }
213
    // sort by key
214
44
    std::sort(kvs.begin(), kvs.end(),
215
117
              [](const auto& left, const auto& right) { return left.first < right.first; });
216
    // unique by key
217
44
    kvs.erase(std::unique(kvs.begin(), kvs.end(),
218
69
                          [](const auto& left, const auto& right) {
219
69
                              return left.first == right.first;
220
69
                          }),
221
44
              kvs.end());
222
44
    return kvs;
223
44
}
224
225
} // namespace doris