be/src/util/jsonb_document.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/jsonb_document.h" |
19 | | |
20 | | #include <memory> |
21 | | #include <string> |
22 | | #include <vector> |
23 | | |
24 | | #include "common/status.h" |
25 | | #include "util/jsonb_writer.h" |
26 | | |
27 | | namespace doris { |
28 | | |
29 | | Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
30 | 1.06M | const JsonbDocument** doc) { |
31 | 1.06M | *doc = nullptr; |
32 | 1.06M | if (!pb || size == 0) { |
33 | 54 | static const std::string buf = []() { |
34 | 2 | JsonbWriter writer; |
35 | 2 | (void)writer.writeNull(); |
36 | 2 | auto* out = writer.getOutput(); |
37 | 2 | return std::string(out->getBuffer(), out->getSize()); |
38 | 2 | }(); |
39 | | // Treat empty input as a valid JSONB null document. |
40 | 54 | *doc = reinterpret_cast<const JsonbDocument*>(buf.data()); |
41 | 54 | return Status::OK(); |
42 | 54 | } |
43 | 1.06M | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
44 | 0 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
45 | 0 | size); |
46 | 0 | } |
47 | | |
48 | 1.06M | const auto* doc_ptr = (const JsonbDocument*)pb; |
49 | 1.06M | if (doc_ptr->header_.ver_ != JSONB_VER) { |
50 | 5 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
51 | 5 | doc_ptr->header_.ver_); |
52 | 5 | } |
53 | | |
54 | 1.06M | const auto* val = (const JsonbValue*)doc_ptr->payload_; |
55 | | // Keep this check lightweight. This API is used by JSONB scalar/table functions on every row, |
56 | | // so recursively validating object/array payloads here would add an O(document size) scan before |
57 | | // the real operation and can regress large JSONB queries. External INSERT/LOAD paths build JSONB |
58 | | // through JsonBinaryValue/JsonbWriter before storage; any untrusted raw binary boundary should |
59 | | // add explicit deep validation there instead of changing this hot-path helper. |
60 | 1.06M | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
61 | 1.06M | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
62 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
63 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
64 | 0 | } |
65 | | |
66 | 1.06M | *doc = doc_ptr; |
67 | 1.06M | return Status::OK(); |
68 | 1.06M | } |
69 | | |
70 | 144k | JsonbFindResult JsonbValue::findValue(JsonbPath& path) const { |
71 | 144k | JsonbFindResult result; |
72 | 144k | bool is_wildcard = false; |
73 | | |
74 | 144k | std::vector<const JsonbValue*> values; |
75 | 144k | std::vector<const JsonbValue*> results; |
76 | 144k | results.emplace_back(this); |
77 | | |
78 | 144k | if (path.is_supper_wildcard()) { |
79 | 7 | std::function<void(const JsonbValue*)> foreach_values; |
80 | 56 | foreach_values = [&](const JsonbValue* val) { |
81 | 56 | if (val->isObject()) { |
82 | 20 | for (const auto& it : *val->unpack<ObjectVal>()) { |
83 | 20 | results.emplace_back(it.value()); |
84 | 20 | foreach_values(it.value()); |
85 | 20 | } |
86 | 46 | } else if (val->isArray()) { |
87 | 29 | for (const auto& it : *val->unpack<ArrayVal>()) { |
88 | 29 | results.emplace_back(&it); |
89 | 29 | foreach_values(&it); |
90 | 29 | } |
91 | 13 | } |
92 | 56 | }; |
93 | 7 | is_wildcard = true; |
94 | 7 | foreach_values(this); |
95 | 7 | } |
96 | | |
97 | 381k | for (size_t i = 0; i < path.get_leg_vector_size(); ++i) { |
98 | 239k | values = std::move(results); |
99 | 239k | for (const auto* pval : values) { |
100 | 226k | switch (path.get_leg_from_leg_vector(i)->type) { |
101 | 20.9k | case MEMBER_CODE: { |
102 | 20.9k | if (LIKELY(pval->type == JsonbType::T_Object)) { |
103 | 7.13k | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
104 | 7.13k | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
105 | 83 | is_wildcard = true; |
106 | 152 | for (const auto& it : *pval->unpack<ObjectVal>()) { |
107 | 152 | results.emplace_back(it.value()); |
108 | 152 | } |
109 | 83 | continue; |
110 | 83 | } |
111 | | |
112 | 7.05k | pval = pval->unpack<ObjectVal>()->find( |
113 | 7.05k | path.get_leg_from_leg_vector(i)->leg_ptr, |
114 | 7.05k | path.get_leg_from_leg_vector(i)->leg_len); |
115 | | |
116 | 7.05k | if (pval) { |
117 | 2.61k | results.emplace_back(pval); |
118 | 2.61k | } |
119 | 7.05k | } |
120 | 20.8k | continue; |
121 | 20.9k | } |
122 | 204k | case ARRAY_CODE: { |
123 | 204k | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
124 | 204k | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
125 | 28 | if (LIKELY(pval->type == JsonbType::T_Array)) { |
126 | 22 | is_wildcard = true; |
127 | 53 | for (const auto& it : *pval->unpack<ArrayVal>()) { |
128 | 53 | results.emplace_back(&it); |
129 | 53 | } |
130 | 22 | } |
131 | 28 | continue; |
132 | 28 | } |
133 | | |
134 | 204k | if (pval->type != JsonbType::T_Array && |
135 | 204k | path.get_leg_from_leg_vector(i)->array_index == 0) { |
136 | | // Same as mysql and postgres |
137 | 1.11k | results.emplace_back(pval); |
138 | 1.11k | continue; |
139 | 1.11k | } |
140 | | |
141 | 203k | if (pval->type != JsonbType::T_Array || |
142 | 203k | path.get_leg_from_leg_vector(i)->leg_ptr != nullptr || |
143 | 203k | path.get_leg_from_leg_vector(i)->leg_len != 0) { |
144 | 7.29k | continue; |
145 | 7.29k | } |
146 | | |
147 | 195k | if (path.get_leg_from_leg_vector(i)->array_index >= 0) { |
148 | 189k | pval = pval->unpack<ArrayVal>()->get( |
149 | 189k | path.get_leg_from_leg_vector(i)->array_index); |
150 | 189k | } else { |
151 | 6.17k | pval = pval->unpack<ArrayVal>()->get( |
152 | 6.17k | pval->unpack<ArrayVal>()->numElem() + |
153 | 6.17k | path.get_leg_from_leg_vector(i)->array_index); |
154 | 6.17k | } |
155 | | |
156 | 195k | if (pval) { |
157 | 186k | results.emplace_back(pval); |
158 | 186k | } |
159 | 195k | continue; |
160 | 203k | } |
161 | 226k | } |
162 | 226k | } |
163 | 239k | } |
164 | | |
165 | 142k | if (is_wildcard) { |
166 | 84 | result.is_wildcard = true; |
167 | 84 | if (results.empty()) { |
168 | 15 | result.value = nullptr; // No values found |
169 | 69 | } else { |
170 | | /// if supper wildcard, need distinct results |
171 | | /// because supper wildcard will traverse all nodes |
172 | | /// |
173 | | /// `select json_extract( '[1]', '$**[0]' );` |
174 | | /// +---------------------------------+ |
175 | | /// | json_extract( '[1]', '$**[0]' ) | |
176 | | /// +---------------------------------+ |
177 | | /// | [1,1] | |
178 | | /// +---------------------------------+ |
179 | 69 | if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] { |
180 | 4 | std::set<const JsonbValue*> distinct_results; |
181 | 17 | for (const auto* pval : results) { |
182 | 17 | distinct_results.insert(pval); |
183 | 17 | } |
184 | 4 | results.assign(distinct_results.begin(), distinct_results.end()); |
185 | 4 | } |
186 | 69 | result.writer = std::make_unique<JsonbWriter>(); |
187 | 69 | result.writer->writeStartArray(); |
188 | 176 | for (const auto* pval : results) { |
189 | 176 | result.writer->writeValue(pval); |
190 | 176 | } |
191 | 69 | result.writer->writeEndArray(); |
192 | | |
193 | 69 | const JsonbDocument* doc = nullptr; |
194 | 69 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
195 | 69 | result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(), |
196 | 69 | &doc)); |
197 | 69 | result.value = doc->getValue(); |
198 | 69 | } |
199 | 142k | } else if (results.size() == 1) { |
200 | 120k | result.value = results[0]; |
201 | 120k | } |
202 | | |
203 | 142k | return result; |
204 | 142k | } |
205 | | |
206 | | std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs() |
207 | 44 | const { |
208 | 44 | std::vector<std::pair<StringRef, const JsonbValue*>> kvs; |
209 | 44 | const auto* obj_val = this; |
210 | 157 | for (auto it = obj_val->begin(); it != obj_val->end(); ++it) { |
211 | 113 | kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value()); |
212 | 113 | } |
213 | | // sort by key |
214 | 44 | std::sort(kvs.begin(), kvs.end(), |
215 | 117 | [](const auto& left, const auto& right) { return left.first < right.first; }); |
216 | | // unique by key |
217 | 44 | kvs.erase(std::unique(kvs.begin(), kvs.end(), |
218 | 69 | [](const auto& left, const auto& right) { |
219 | 69 | return left.first == right.first; |
220 | 69 | }), |
221 | 44 | kvs.end()); |
222 | 44 | return kvs; |
223 | 44 | } |
224 | | |
225 | | } // namespace doris |