Coverage Report

Created: 2026-03-12 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/jsonb_document.h"
19
20
#include <memory>
21
#include <string>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
30
37.7k
                                             const JsonbDocument** doc) {
31
37.7k
    *doc = nullptr;
32
37.7k
    if (!pb || size == 0) {
33
2
        static const std::string buf = []() {
34
1
            JsonbWriter writer;
35
1
            (void)writer.writeNull();
36
1
            auto* out = writer.getOutput();
37
1
            return std::string(out->getBuffer(), out->getSize());
38
1
        }();
39
        // Treat empty input as a valid JSONB null document.
40
2
        *doc = reinterpret_cast<const JsonbDocument*>(buf.data());
41
2
        return Status::OK();
42
2
    }
43
37.7k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
44
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
45
0
                                       size);
46
0
    }
47
48
37.7k
    const auto* doc_ptr = (const JsonbDocument*)pb;
49
37.7k
    if (doc_ptr->header_.ver_ != JSONB_VER) {
50
2
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
51
2
                                       doc_ptr->header_.ver_);
52
2
    }
53
54
37.7k
    const auto* val = (const JsonbValue*)doc_ptr->payload_;
55
37.7k
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
56
37.7k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
57
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
58
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
59
0
    }
60
61
37.7k
    *doc = doc_ptr;
62
37.7k
    return Status::OK();
63
37.7k
}
64
65
4.60k
JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
66
4.60k
    JsonbFindResult result;
67
4.60k
    bool is_wildcard = false;
68
69
4.60k
    std::vector<const JsonbValue*> values;
70
4.60k
    std::vector<const JsonbValue*> results;
71
4.60k
    results.emplace_back(this);
72
73
4.60k
    if (path.is_supper_wildcard()) {
74
0
        std::function<void(const JsonbValue*)> foreach_values;
75
0
        foreach_values = [&](const JsonbValue* val) {
76
0
            if (val->isObject()) {
77
0
                for (const auto& it : *val->unpack<ObjectVal>()) {
78
0
                    results.emplace_back(it.value());
79
0
                    foreach_values(it.value());
80
0
                }
81
0
            } else if (val->isArray()) {
82
0
                for (const auto& it : *val->unpack<ArrayVal>()) {
83
0
                    results.emplace_back(&it);
84
0
                    foreach_values(&it);
85
0
                }
86
0
            }
87
0
        };
88
0
        is_wildcard = true;
89
0
        foreach_values(this);
90
0
    }
91
92
9.42k
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
93
4.82k
        values = std::move(results);
94
4.82k
        for (const auto* pval : values) {
95
4.81k
            switch (path.get_leg_from_leg_vector(i)->type) {
96
4.37k
            case MEMBER_CODE: {
97
4.37k
                if (LIKELY(pval->type == JsonbType::T_Object)) {
98
4.35k
                    if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
99
4.35k
                        *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
100
0
                        is_wildcard = true;
101
0
                        for (const auto& it : *pval->unpack<ObjectVal>()) {
102
0
                            results.emplace_back(it.value());
103
0
                        }
104
0
                        continue;
105
0
                    }
106
107
4.35k
                    pval = pval->unpack<ObjectVal>()->find(
108
4.35k
                            path.get_leg_from_leg_vector(i)->leg_ptr,
109
4.35k
                            path.get_leg_from_leg_vector(i)->leg_len);
110
111
4.35k
                    if (pval) {
112
4.34k
                        results.emplace_back(pval);
113
4.34k
                    }
114
4.35k
                }
115
4.37k
                continue;
116
4.37k
            }
117
4.37k
            case ARRAY_CODE: {
118
442
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
119
442
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
120
0
                    if (LIKELY(pval->type == JsonbType::T_Array)) {
121
0
                        is_wildcard = true;
122
0
                        for (const auto& it : *pval->unpack<ArrayVal>()) {
123
0
                            results.emplace_back(&it);
124
0
                        }
125
0
                    }
126
0
                    continue;
127
0
                }
128
129
442
                if (pval->type != JsonbType::T_Array &&
130
442
                    path.get_leg_from_leg_vector(i)->array_index == 0) {
131
                    // Same as mysql and postgres
132
22
                    results.emplace_back(pval);
133
22
                    continue;
134
22
                }
135
136
420
                if (pval->type != JsonbType::T_Array ||
137
420
                    path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
138
420
                    path.get_leg_from_leg_vector(i)->leg_len != 0) {
139
11
                    continue;
140
11
                }
141
142
409
                if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
143
409
                    pval = pval->unpack<ArrayVal>()->get(
144
409
                            path.get_leg_from_leg_vector(i)->array_index);
145
409
                } else {
146
0
                    pval = pval->unpack<ArrayVal>()->get(
147
0
                            pval->unpack<ArrayVal>()->numElem() +
148
0
                            path.get_leg_from_leg_vector(i)->array_index);
149
0
                }
150
151
409
                if (pval) {
152
402
                    results.emplace_back(pval);
153
402
                }
154
409
                continue;
155
420
            }
156
4.81k
            }
157
4.81k
        }
158
4.82k
    }
159
160
4.60k
    if (is_wildcard) {
161
0
        result.is_wildcard = true;
162
0
        if (results.empty()) {
163
0
            result.value = nullptr; // No values found
164
0
        } else {
165
            /// if supper wildcard, need distinct results
166
            /// because supper wildcard will traverse all nodes
167
            ///
168
            /// `select json_extract( '[1]', '$**[0]' );`
169
            /// +---------------------------------+
170
            /// | json_extract( '[1]', '$**[0]' ) |
171
            /// +---------------------------------+
172
            /// | [1,1]                           |
173
            /// +---------------------------------+
174
0
            if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] {
175
0
                std::set<const JsonbValue*> distinct_results;
176
0
                for (const auto* pval : results) {
177
0
                    distinct_results.insert(pval);
178
0
                }
179
0
                results.assign(distinct_results.begin(), distinct_results.end());
180
0
            }
181
0
            result.writer = std::make_unique<JsonbWriter>();
182
0
            result.writer->writeStartArray();
183
0
            for (const auto* pval : results) {
184
0
                result.writer->writeValue(pval);
185
0
            }
186
0
            result.writer->writeEndArray();
187
188
0
            const JsonbDocument* doc = nullptr;
189
0
            THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
190
0
                    result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(),
191
0
                    &doc));
192
0
            result.value = doc->getValue();
193
0
        }
194
4.60k
    } else if (results.size() == 1) {
195
4.55k
        result.value = results[0];
196
4.55k
    }
197
198
4.60k
    return result;
199
4.60k
}
200
201
std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs()
202
0
        const {
203
0
    std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
204
0
    const auto* obj_val = this;
205
0
    for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
206
0
        kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
207
0
    }
208
    // sort by key
209
0
    std::sort(kvs.begin(), kvs.end(),
210
0
              [](const auto& left, const auto& right) { return left.first < right.first; });
211
    // unique by key
212
0
    kvs.erase(std::unique(kvs.begin(), kvs.end(),
213
0
                          [](const auto& left, const auto& right) {
214
0
                              return left.first == right.first;
215
0
                          }),
216
0
              kvs.end());
217
0
    return kvs;
218
0
}
219
220
} // namespace doris