Coverage Report

Created: 2026-03-13 03:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/jsonb_document.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/jsonb_document.h"
19
20
#include <memory>
21
#include <string>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
30
1.19M
                                             const JsonbDocument** doc) {
31
1.19M
    *doc = nullptr;
32
1.19M
    if (!pb || size == 0) {
33
445
        static const std::string buf = []() {
34
2
            JsonbWriter writer;
35
2
            (void)writer.writeNull();
36
2
            auto* out = writer.getOutput();
37
2
            return std::string(out->getBuffer(), out->getSize());
38
2
        }();
39
        // Treat empty input as a valid JSONB null document.
40
445
        *doc = reinterpret_cast<const JsonbDocument*>(buf.data());
41
445
        return Status::OK();
42
445
    }
43
1.19M
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
44
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
45
0
                                       size);
46
0
    }
47
48
1.19M
    const auto* doc_ptr = (const JsonbDocument*)pb;
49
1.19M
    if (doc_ptr->header_.ver_ != JSONB_VER) {
50
2
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
51
2
                                       doc_ptr->header_.ver_);
52
2
    }
53
54
1.19M
    const auto* val = (const JsonbValue*)doc_ptr->payload_;
55
1.19M
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
56
1.19M
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
57
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
58
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
59
0
    }
60
61
1.19M
    *doc = doc_ptr;
62
1.19M
    return Status::OK();
63
1.19M
}
64
65
147k
JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
66
147k
    JsonbFindResult result;
67
147k
    bool is_wildcard = false;
68
69
147k
    std::vector<const JsonbValue*> values;
70
147k
    std::vector<const JsonbValue*> results;
71
147k
    results.emplace_back(this);
72
73
147k
    if (path.is_supper_wildcard()) {
74
7
        std::function<void(const JsonbValue*)> foreach_values;
75
56
        foreach_values = [&](const JsonbValue* val) {
76
56
            if (val->isObject()) {
77
20
                for (const auto& it : *val->unpack<ObjectVal>()) {
78
20
                    results.emplace_back(it.value());
79
20
                    foreach_values(it.value());
80
20
                }
81
46
            } else if (val->isArray()) {
82
29
                for (const auto& it : *val->unpack<ArrayVal>()) {
83
29
                    results.emplace_back(&it);
84
29
                    foreach_values(&it);
85
29
                }
86
13
            }
87
56
        };
88
7
        is_wildcard = true;
89
7
        foreach_values(this);
90
7
    }
91
92
391k
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
93
244k
        values = std::move(results);
94
244k
        for (const auto* pval : values) {
95
231k
            switch (path.get_leg_from_leg_vector(i)->type) {
96
21.1k
            case MEMBER_CODE: {
97
21.1k
                if (LIKELY(pval->type == JsonbType::T_Object)) {
98
7.35k
                    if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
99
7.35k
                        *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
100
83
                        is_wildcard = true;
101
152
                        for (const auto& it : *pval->unpack<ObjectVal>()) {
102
152
                            results.emplace_back(it.value());
103
152
                        }
104
83
                        continue;
105
83
                    }
106
107
7.27k
                    pval = pval->unpack<ObjectVal>()->find(
108
7.27k
                            path.get_leg_from_leg_vector(i)->leg_ptr,
109
7.27k
                            path.get_leg_from_leg_vector(i)->leg_len);
110
111
7.27k
                    if (pval) {
112
2.83k
                        results.emplace_back(pval);
113
2.83k
                    }
114
7.27k
                }
115
21.0k
                continue;
116
21.1k
            }
117
209k
            case ARRAY_CODE: {
118
209k
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
119
209k
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
120
28
                    if (LIKELY(pval->type == JsonbType::T_Array)) {
121
22
                        is_wildcard = true;
122
53
                        for (const auto& it : *pval->unpack<ArrayVal>()) {
123
53
                            results.emplace_back(&it);
124
53
                        }
125
22
                    }
126
28
                    continue;
127
28
                }
128
129
209k
                if (pval->type != JsonbType::T_Array &&
130
209k
                    path.get_leg_from_leg_vector(i)->array_index == 0) {
131
                    // Same as mysql and postgres
132
1.11k
                    results.emplace_back(pval);
133
1.11k
                    continue;
134
1.11k
                }
135
136
208k
                if (pval->type != JsonbType::T_Array ||
137
208k
                    path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
138
208k
                    path.get_leg_from_leg_vector(i)->leg_len != 0) {
139
7.29k
                    continue;
140
7.29k
                }
141
142
200k
                if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
143
197k
                    pval = pval->unpack<ArrayVal>()->get(
144
197k
                            path.get_leg_from_leg_vector(i)->array_index);
145
197k
                } else {
146
3.28k
                    pval = pval->unpack<ArrayVal>()->get(
147
3.28k
                            pval->unpack<ArrayVal>()->numElem() +
148
3.28k
                            path.get_leg_from_leg_vector(i)->array_index);
149
3.28k
                }
150
151
200k
                if (pval) {
152
194k
                    results.emplace_back(pval);
153
194k
                }
154
200k
                continue;
155
208k
            }
156
231k
            }
157
231k
        }
158
244k
    }
159
160
146k
    if (is_wildcard) {
161
84
        result.is_wildcard = true;
162
84
        if (results.empty()) {
163
15
            result.value = nullptr; // No values found
164
69
        } else {
165
            /// if supper wildcard, need distinct results
166
            /// because supper wildcard will traverse all nodes
167
            ///
168
            /// `select json_extract( '[1]', '$**[0]' );`
169
            /// +---------------------------------+
170
            /// | json_extract( '[1]', '$**[0]' ) |
171
            /// +---------------------------------+
172
            /// | [1,1]                           |
173
            /// +---------------------------------+
174
69
            if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] {
175
4
                std::set<const JsonbValue*> distinct_results;
176
17
                for (const auto* pval : results) {
177
17
                    distinct_results.insert(pval);
178
17
                }
179
4
                results.assign(distinct_results.begin(), distinct_results.end());
180
4
            }
181
69
            result.writer = std::make_unique<JsonbWriter>();
182
69
            result.writer->writeStartArray();
183
176
            for (const auto* pval : results) {
184
176
                result.writer->writeValue(pval);
185
176
            }
186
69
            result.writer->writeEndArray();
187
188
69
            const JsonbDocument* doc = nullptr;
189
69
            THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
190
69
                    result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(),
191
69
                    &doc));
192
69
            result.value = doc->getValue();
193
69
        }
194
146k
    } else if (results.size() == 1) {
195
121k
        result.value = results[0];
196
121k
    }
197
198
146k
    return result;
199
146k
}
200
201
std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs()
202
44
        const {
203
44
    std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
204
44
    const auto* obj_val = this;
205
157
    for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
206
113
        kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
207
113
    }
208
    // sort by key
209
44
    std::sort(kvs.begin(), kvs.end(),
210
117
              [](const auto& left, const auto& right) { return left.first < right.first; });
211
    // unique by key
212
44
    kvs.erase(std::unique(kvs.begin(), kvs.end(),
213
69
                          [](const auto& left, const auto& right) {
214
69
                              return left.first == right.first;
215
69
                          }),
216
44
              kvs.end());
217
44
    return kvs;
218
44
}
219
220
} // namespace doris