Coverage Report

Created: 2025-12-25 18:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/jsonb_document.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "jsonb_document.h"
19
20
#include <memory>
21
#include <string>
22
#include <vector>
23
24
#include "common/status.h"
25
#include "util/jsonb_writer.h"
26
27
namespace doris {
28
29
Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size,
30
27.4k
                                             const JsonbDocument** doc) {
31
27.4k
    *doc = nullptr;
32
27.4k
    if (!pb || size == 0) {
33
2
        static const std::string buf = []() {
34
1
            JsonbWriter writer;
35
1
            (void)writer.writeNull();
36
1
            auto* out = writer.getOutput();
37
1
            return std::string(out->getBuffer(), out->getSize());
38
1
        }();
39
        // Treat empty input as a valid JSONB null document.
40
2
        *doc = reinterpret_cast<const JsonbDocument*>(buf.data());
41
2
        return Status::OK();
42
2
    }
43
27.4k
    if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) {
44
0
        return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer",
45
0
                                       size);
46
0
    }
47
48
27.4k
    const auto* doc_ptr = (const JsonbDocument*)pb;
49
27.4k
    if (doc_ptr->header_.ver_ != JSONB_VER) {
50
2
        return Status::InvalidArgument("Invalid JSONB document: invalid version({})",
51
2
                                       doc_ptr->header_.ver_);
52
2
    }
53
54
27.4k
    const auto* val = (const JsonbValue*)doc_ptr->payload_;
55
27.4k
    if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES ||
56
27.4k
        size != sizeof(JsonbHeader) + val->numPackedBytes()) {
57
0
        return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})",
58
0
                                       static_cast<JsonbTypeUnder>(val->type), size);
59
0
    }
60
61
27.4k
    *doc = doc_ptr;
62
27.4k
    return Status::OK();
63
27.4k
}
64
65
95
JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
66
95
    JsonbFindResult result;
67
95
    bool is_wildcard = false;
68
69
95
    std::vector<const JsonbValue*> values;
70
95
    std::vector<const JsonbValue*> results;
71
95
    results.emplace_back(this);
72
73
95
    if (path.is_supper_wildcard()) {
74
0
        std::function<void(const JsonbValue*)> foreach_values;
75
0
        foreach_values = [&](const JsonbValue* val) {
76
0
            if (val->isObject()) {
77
0
                for (const auto& it : *val->unpack<ObjectVal>()) {
78
0
                    results.emplace_back(it.value());
79
0
                    foreach_values(it.value());
80
0
                }
81
0
            } else if (val->isArray()) {
82
0
                for (const auto& it : *val->unpack<ArrayVal>()) {
83
0
                    results.emplace_back(&it);
84
0
                    foreach_values(&it);
85
0
                }
86
0
            }
87
0
        };
88
0
        is_wildcard = true;
89
0
        foreach_values(this);
90
0
    }
91
92
185
    for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
93
90
        values = std::move(results);
94
90
        for (const auto* pval : values) {
95
89
            switch (path.get_leg_from_leg_vector(i)->type) {
96
31
            case MEMBER_CODE: {
97
31
                if (LIKELY(pval->type == JsonbType::T_Object)) {
98
5
                    if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
99
5
                        *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
100
0
                        is_wildcard = true;
101
0
                        for (const auto& it : *pval->unpack<ObjectVal>()) {
102
0
                            results.emplace_back(it.value());
103
0
                        }
104
0
                        continue;
105
0
                    }
106
107
5
                    pval = pval->unpack<ObjectVal>()->find(
108
5
                            path.get_leg_from_leg_vector(i)->leg_ptr,
109
5
                            path.get_leg_from_leg_vector(i)->leg_len);
110
111
5
                    if (pval) {
112
3
                        results.emplace_back(pval);
113
3
                    }
114
5
                }
115
31
                continue;
116
31
            }
117
58
            case ARRAY_CODE: {
118
58
                if (path.get_leg_from_leg_vector(i)->leg_len == 1 &&
119
58
                    *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) {
120
0
                    if (LIKELY(pval->type == JsonbType::T_Array)) {
121
0
                        is_wildcard = true;
122
0
                        for (const auto& it : *pval->unpack<ArrayVal>()) {
123
0
                            results.emplace_back(&it);
124
0
                        }
125
0
                    }
126
0
                    continue;
127
0
                }
128
129
58
                if (pval->type != JsonbType::T_Array &&
130
58
                    path.get_leg_from_leg_vector(i)->array_index == 0) {
131
                    // Same as mysql and postgres
132
22
                    results.emplace_back(pval);
133
22
                    continue;
134
22
                }
135
136
36
                if (pval->type != JsonbType::T_Array ||
137
36
                    path.get_leg_from_leg_vector(i)->leg_ptr != nullptr ||
138
36
                    path.get_leg_from_leg_vector(i)->leg_len != 0) {
139
11
                    continue;
140
11
                }
141
142
25
                if (path.get_leg_from_leg_vector(i)->array_index >= 0) {
143
25
                    pval = pval->unpack<ArrayVal>()->get(
144
25
                            path.get_leg_from_leg_vector(i)->array_index);
145
25
                } else {
146
0
                    pval = pval->unpack<ArrayVal>()->get(
147
0
                            pval->unpack<ArrayVal>()->numElem() +
148
0
                            path.get_leg_from_leg_vector(i)->array_index);
149
0
                }
150
151
25
                if (pval) {
152
18
                    results.emplace_back(pval);
153
18
                }
154
25
                continue;
155
36
            }
156
89
            }
157
89
        }
158
90
    }
159
160
95
    if (is_wildcard) {
161
0
        result.is_wildcard = true;
162
0
        if (results.empty()) {
163
0
            result.value = nullptr; // No values found
164
0
        } else {
165
            /// if supper wildcard, need distinct results
166
            /// because supper wildcard will traverse all nodes
167
            ///
168
            /// `select json_extract( '[1]', '$**[0]' );`
169
            /// +---------------------------------+
170
            /// | json_extract( '[1]', '$**[0]' ) |
171
            /// +---------------------------------+
172
            /// | [1,1]                           |
173
            /// +---------------------------------+
174
0
            if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] {
175
0
                std::set<const JsonbValue*> distinct_results;
176
0
                for (const auto* pval : results) {
177
0
                    distinct_results.insert(pval);
178
0
                }
179
0
                results.assign(distinct_results.begin(), distinct_results.end());
180
0
            }
181
0
            result.writer = std::make_unique<JsonbWriter>();
182
0
            result.writer->writeStartArray();
183
0
            for (const auto* pval : results) {
184
0
                result.writer->writeValue(pval);
185
0
            }
186
0
            result.writer->writeEndArray();
187
188
0
            const JsonbDocument* doc = nullptr;
189
0
            THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(
190
0
                    result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(),
191
0
                    &doc));
192
0
            result.value = doc->getValue();
193
0
        }
194
95
    } else if (results.size() == 1) {
195
49
        result.value = results[0];
196
49
    }
197
198
95
    return result;
199
95
}
200
201
std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs()
202
0
        const {
203
0
    std::vector<std::pair<StringRef, const JsonbValue*>> kvs;
204
0
    const auto* obj_val = this;
205
0
    for (auto it = obj_val->begin(); it != obj_val->end(); ++it) {
206
0
        kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value());
207
0
    }
208
    // sort by key
209
0
    std::sort(kvs.begin(), kvs.end(),
210
0
              [](const auto& left, const auto& right) { return left.first < right.first; });
211
    // unique by key
212
0
    kvs.erase(std::unique(kvs.begin(), kvs.end(),
213
0
                          [](const auto& left, const auto& right) {
214
0
                              return left.first == right.first;
215
0
                          }),
216
0
              kvs.end());
217
0
    return kvs;
218
0
}
219
220
} // namespace doris