be/src/util/jsonb_document.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/jsonb_document.h" |
19 | | |
20 | | #include <memory> |
21 | | #include <string> |
22 | | #include <vector> |
23 | | |
24 | | #include "common/status.h" |
25 | | #include "util/jsonb_writer.h" |
26 | | |
27 | | namespace doris { |
28 | | |
29 | | Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
30 | 1.19M | const JsonbDocument** doc) { |
31 | 1.19M | *doc = nullptr; |
32 | 1.19M | if (!pb || size == 0) { |
33 | 445 | static const std::string buf = []() { |
34 | 2 | JsonbWriter writer; |
35 | 2 | (void)writer.writeNull(); |
36 | 2 | auto* out = writer.getOutput(); |
37 | 2 | return std::string(out->getBuffer(), out->getSize()); |
38 | 2 | }(); |
39 | | // Treat empty input as a valid JSONB null document. |
40 | 445 | *doc = reinterpret_cast<const JsonbDocument*>(buf.data()); |
41 | 445 | return Status::OK(); |
42 | 445 | } |
43 | 1.19M | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
44 | 0 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
45 | 0 | size); |
46 | 0 | } |
47 | | |
48 | 1.19M | const auto* doc_ptr = (const JsonbDocument*)pb; |
49 | 1.19M | if (doc_ptr->header_.ver_ != JSONB_VER) { |
50 | 2 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
51 | 2 | doc_ptr->header_.ver_); |
52 | 2 | } |
53 | | |
54 | 1.19M | const auto* val = (const JsonbValue*)doc_ptr->payload_; |
55 | 1.19M | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
56 | 1.19M | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
57 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
58 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
59 | 0 | } |
60 | | |
61 | 1.19M | *doc = doc_ptr; |
62 | 1.19M | return Status::OK(); |
63 | 1.19M | } |
64 | | |
65 | 147k | JsonbFindResult JsonbValue::findValue(JsonbPath& path) const { |
66 | 147k | JsonbFindResult result; |
67 | 147k | bool is_wildcard = false; |
68 | | |
69 | 147k | std::vector<const JsonbValue*> values; |
70 | 147k | std::vector<const JsonbValue*> results; |
71 | 147k | results.emplace_back(this); |
72 | | |
73 | 147k | if (path.is_supper_wildcard()) { |
74 | 7 | std::function<void(const JsonbValue*)> foreach_values; |
75 | 56 | foreach_values = [&](const JsonbValue* val) { |
76 | 56 | if (val->isObject()) { |
77 | 20 | for (const auto& it : *val->unpack<ObjectVal>()) { |
78 | 20 | results.emplace_back(it.value()); |
79 | 20 | foreach_values(it.value()); |
80 | 20 | } |
81 | 46 | } else if (val->isArray()) { |
82 | 29 | for (const auto& it : *val->unpack<ArrayVal>()) { |
83 | 29 | results.emplace_back(&it); |
84 | 29 | foreach_values(&it); |
85 | 29 | } |
86 | 13 | } |
87 | 56 | }; |
88 | 7 | is_wildcard = true; |
89 | 7 | foreach_values(this); |
90 | 7 | } |
91 | | |
92 | 391k | for (size_t i = 0; i < path.get_leg_vector_size(); ++i) { |
93 | 244k | values = std::move(results); |
94 | 244k | for (const auto* pval : values) { |
95 | 231k | switch (path.get_leg_from_leg_vector(i)->type) { |
96 | 21.1k | case MEMBER_CODE: { |
97 | 21.1k | if (LIKELY(pval->type == JsonbType::T_Object)) { |
98 | 7.35k | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
99 | 7.35k | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
100 | 83 | is_wildcard = true; |
101 | 152 | for (const auto& it : *pval->unpack<ObjectVal>()) { |
102 | 152 | results.emplace_back(it.value()); |
103 | 152 | } |
104 | 83 | continue; |
105 | 83 | } |
106 | | |
107 | 7.27k | pval = pval->unpack<ObjectVal>()->find( |
108 | 7.27k | path.get_leg_from_leg_vector(i)->leg_ptr, |
109 | 7.27k | path.get_leg_from_leg_vector(i)->leg_len); |
110 | | |
111 | 7.27k | if (pval) { |
112 | 2.83k | results.emplace_back(pval); |
113 | 2.83k | } |
114 | 7.27k | } |
115 | 21.0k | continue; |
116 | 21.1k | } |
117 | 209k | case ARRAY_CODE: { |
118 | 209k | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
119 | 209k | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
120 | 28 | if (LIKELY(pval->type == JsonbType::T_Array)) { |
121 | 22 | is_wildcard = true; |
122 | 53 | for (const auto& it : *pval->unpack<ArrayVal>()) { |
123 | 53 | results.emplace_back(&it); |
124 | 53 | } |
125 | 22 | } |
126 | 28 | continue; |
127 | 28 | } |
128 | | |
129 | 209k | if (pval->type != JsonbType::T_Array && |
130 | 209k | path.get_leg_from_leg_vector(i)->array_index == 0) { |
131 | | // Same as mysql and postgres |
132 | 1.11k | results.emplace_back(pval); |
133 | 1.11k | continue; |
134 | 1.11k | } |
135 | | |
136 | 208k | if (pval->type != JsonbType::T_Array || |
137 | 208k | path.get_leg_from_leg_vector(i)->leg_ptr != nullptr || |
138 | 208k | path.get_leg_from_leg_vector(i)->leg_len != 0) { |
139 | 7.29k | continue; |
140 | 7.29k | } |
141 | | |
142 | 200k | if (path.get_leg_from_leg_vector(i)->array_index >= 0) { |
143 | 197k | pval = pval->unpack<ArrayVal>()->get( |
144 | 197k | path.get_leg_from_leg_vector(i)->array_index); |
145 | 197k | } else { |
146 | 3.28k | pval = pval->unpack<ArrayVal>()->get( |
147 | 3.28k | pval->unpack<ArrayVal>()->numElem() + |
148 | 3.28k | path.get_leg_from_leg_vector(i)->array_index); |
149 | 3.28k | } |
150 | | |
151 | 200k | if (pval) { |
152 | 194k | results.emplace_back(pval); |
153 | 194k | } |
154 | 200k | continue; |
155 | 208k | } |
156 | 231k | } |
157 | 231k | } |
158 | 244k | } |
159 | | |
160 | 146k | if (is_wildcard) { |
161 | 84 | result.is_wildcard = true; |
162 | 84 | if (results.empty()) { |
163 | 15 | result.value = nullptr; // No values found |
164 | 69 | } else { |
165 | | /// if supper wildcard, need distinct results |
166 | | /// because supper wildcard will traverse all nodes |
167 | | /// |
168 | | /// `select json_extract( '[1]', '$**[0]' );` |
169 | | /// +---------------------------------+ |
170 | | /// | json_extract( '[1]', '$**[0]' ) | |
171 | | /// +---------------------------------+ |
172 | | /// | [1,1] | |
173 | | /// +---------------------------------+ |
174 | 69 | if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] { |
175 | 4 | std::set<const JsonbValue*> distinct_results; |
176 | 17 | for (const auto* pval : results) { |
177 | 17 | distinct_results.insert(pval); |
178 | 17 | } |
179 | 4 | results.assign(distinct_results.begin(), distinct_results.end()); |
180 | 4 | } |
181 | 69 | result.writer = std::make_unique<JsonbWriter>(); |
182 | 69 | result.writer->writeStartArray(); |
183 | 176 | for (const auto* pval : results) { |
184 | 176 | result.writer->writeValue(pval); |
185 | 176 | } |
186 | 69 | result.writer->writeEndArray(); |
187 | | |
188 | 69 | const JsonbDocument* doc = nullptr; |
189 | 69 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
190 | 69 | result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(), |
191 | 69 | &doc)); |
192 | 69 | result.value = doc->getValue(); |
193 | 69 | } |
194 | 146k | } else if (results.size() == 1) { |
195 | 121k | result.value = results[0]; |
196 | 121k | } |
197 | | |
198 | 146k | return result; |
199 | 146k | } |
200 | | |
201 | | std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs() |
202 | 44 | const { |
203 | 44 | std::vector<std::pair<StringRef, const JsonbValue*>> kvs; |
204 | 44 | const auto* obj_val = this; |
205 | 157 | for (auto it = obj_val->begin(); it != obj_val->end(); ++it) { |
206 | 113 | kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value()); |
207 | 113 | } |
208 | | // sort by key |
209 | 44 | std::sort(kvs.begin(), kvs.end(), |
210 | 117 | [](const auto& left, const auto& right) { return left.first < right.first; }); |
211 | | // unique by key |
212 | 44 | kvs.erase(std::unique(kvs.begin(), kvs.end(), |
213 | 69 | [](const auto& left, const auto& right) { |
214 | 69 | return left.first == right.first; |
215 | 69 | }), |
216 | 44 | kvs.end()); |
217 | 44 | return kvs; |
218 | 44 | } |
219 | | |
220 | | } // namespace doris |