/root/doris/be/src/util/jsonb_document.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "jsonb_document.h" |
19 | | |
20 | | #include <memory> |
21 | | #include <string> |
22 | | #include <vector> |
23 | | |
24 | | #include "common/status.h" |
25 | | #include "util/jsonb_writer.h" |
26 | | |
27 | | namespace doris { |
28 | | |
29 | | Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
30 | 27.4k | const JsonbDocument** doc) { |
31 | 27.4k | *doc = nullptr; |
32 | 27.4k | if (!pb || size == 0) { |
33 | 2 | static const std::string buf = []() { |
34 | 1 | JsonbWriter writer; |
35 | 1 | (void)writer.writeNull(); |
36 | 1 | auto* out = writer.getOutput(); |
37 | 1 | return std::string(out->getBuffer(), out->getSize()); |
38 | 1 | }(); |
39 | | // Treat empty input as a valid JSONB null document. |
40 | 2 | *doc = reinterpret_cast<const JsonbDocument*>(buf.data()); |
41 | 2 | return Status::OK(); |
42 | 2 | } |
43 | 27.4k | if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
44 | 0 | return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
45 | 0 | size); |
46 | 0 | } |
47 | | |
48 | 27.4k | const auto* doc_ptr = (const JsonbDocument*)pb; |
49 | 27.4k | if (doc_ptr->header_.ver_ != JSONB_VER) { |
50 | 2 | return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
51 | 2 | doc_ptr->header_.ver_); |
52 | 2 | } |
53 | | |
54 | 27.4k | const auto* val = (const JsonbValue*)doc_ptr->payload_; |
55 | 27.4k | if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
56 | 27.4k | size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
57 | 0 | return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
58 | 0 | static_cast<JsonbTypeUnder>(val->type), size); |
59 | 0 | } |
60 | | |
61 | 27.4k | *doc = doc_ptr; |
62 | 27.4k | return Status::OK(); |
63 | 27.4k | } |
64 | | |
65 | 95 | JsonbFindResult JsonbValue::findValue(JsonbPath& path) const { |
66 | 95 | JsonbFindResult result; |
67 | 95 | bool is_wildcard = false; |
68 | | |
69 | 95 | std::vector<const JsonbValue*> values; |
70 | 95 | std::vector<const JsonbValue*> results; |
71 | 95 | results.emplace_back(this); |
72 | | |
73 | 95 | if (path.is_supper_wildcard()) { |
74 | 0 | std::function<void(const JsonbValue*)> foreach_values; |
75 | 0 | foreach_values = [&](const JsonbValue* val) { |
76 | 0 | if (val->isObject()) { |
77 | 0 | for (const auto& it : *val->unpack<ObjectVal>()) { |
78 | 0 | results.emplace_back(it.value()); |
79 | 0 | foreach_values(it.value()); |
80 | 0 | } |
81 | 0 | } else if (val->isArray()) { |
82 | 0 | for (const auto& it : *val->unpack<ArrayVal>()) { |
83 | 0 | results.emplace_back(&it); |
84 | 0 | foreach_values(&it); |
85 | 0 | } |
86 | 0 | } |
87 | 0 | }; |
88 | 0 | is_wildcard = true; |
89 | 0 | foreach_values(this); |
90 | 0 | } |
91 | | |
92 | 185 | for (size_t i = 0; i < path.get_leg_vector_size(); ++i) { |
93 | 90 | values = std::move(results); |
94 | 90 | for (const auto* pval : values) { |
95 | 89 | switch (path.get_leg_from_leg_vector(i)->type) { |
96 | 31 | case MEMBER_CODE: { |
97 | 31 | if (LIKELY(pval->type == JsonbType::T_Object)) { |
98 | 5 | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
99 | 5 | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
100 | 0 | is_wildcard = true; |
101 | 0 | for (const auto& it : *pval->unpack<ObjectVal>()) { |
102 | 0 | results.emplace_back(it.value()); |
103 | 0 | } |
104 | 0 | continue; |
105 | 0 | } |
106 | | |
107 | 5 | pval = pval->unpack<ObjectVal>()->find( |
108 | 5 | path.get_leg_from_leg_vector(i)->leg_ptr, |
109 | 5 | path.get_leg_from_leg_vector(i)->leg_len); |
110 | | |
111 | 5 | if (pval) { |
112 | 3 | results.emplace_back(pval); |
113 | 3 | } |
114 | 5 | } |
115 | 31 | continue; |
116 | 31 | } |
117 | 58 | case ARRAY_CODE: { |
118 | 58 | if (path.get_leg_from_leg_vector(i)->leg_len == 1 && |
119 | 58 | *path.get_leg_from_leg_vector(i)->leg_ptr == WILDCARD) { |
120 | 0 | if (LIKELY(pval->type == JsonbType::T_Array)) { |
121 | 0 | is_wildcard = true; |
122 | 0 | for (const auto& it : *pval->unpack<ArrayVal>()) { |
123 | 0 | results.emplace_back(&it); |
124 | 0 | } |
125 | 0 | } |
126 | 0 | continue; |
127 | 0 | } |
128 | | |
129 | 58 | if (pval->type != JsonbType::T_Array && |
130 | 58 | path.get_leg_from_leg_vector(i)->array_index == 0) { |
131 | | // Same as mysql and postgres |
132 | 22 | results.emplace_back(pval); |
133 | 22 | continue; |
134 | 22 | } |
135 | | |
136 | 36 | if (pval->type != JsonbType::T_Array || |
137 | 36 | path.get_leg_from_leg_vector(i)->leg_ptr != nullptr || |
138 | 36 | path.get_leg_from_leg_vector(i)->leg_len != 0) { |
139 | 11 | continue; |
140 | 11 | } |
141 | | |
142 | 25 | if (path.get_leg_from_leg_vector(i)->array_index >= 0) { |
143 | 25 | pval = pval->unpack<ArrayVal>()->get( |
144 | 25 | path.get_leg_from_leg_vector(i)->array_index); |
145 | 25 | } else { |
146 | 0 | pval = pval->unpack<ArrayVal>()->get( |
147 | 0 | pval->unpack<ArrayVal>()->numElem() + |
148 | 0 | path.get_leg_from_leg_vector(i)->array_index); |
149 | 0 | } |
150 | | |
151 | 25 | if (pval) { |
152 | 18 | results.emplace_back(pval); |
153 | 18 | } |
154 | 25 | continue; |
155 | 36 | } |
156 | 89 | } |
157 | 89 | } |
158 | 90 | } |
159 | | |
160 | 95 | if (is_wildcard) { |
161 | 0 | result.is_wildcard = true; |
162 | 0 | if (results.empty()) { |
163 | 0 | result.value = nullptr; // No values found |
164 | 0 | } else { |
165 | | /// if supper wildcard, need distinct results |
166 | | /// because supper wildcard will traverse all nodes |
167 | | /// |
168 | | /// `select json_extract( '[1]', '$**[0]' );` |
169 | | /// +---------------------------------+ |
170 | | /// | json_extract( '[1]', '$**[0]' ) | |
171 | | /// +---------------------------------+ |
172 | | /// | [1,1] | |
173 | | /// +---------------------------------+ |
174 | 0 | if (results.size() > 1 && path.is_supper_wildcard()) [[unlikely]] { |
175 | 0 | std::set<const JsonbValue*> distinct_results; |
176 | 0 | for (const auto* pval : results) { |
177 | 0 | distinct_results.insert(pval); |
178 | 0 | } |
179 | 0 | results.assign(distinct_results.begin(), distinct_results.end()); |
180 | 0 | } |
181 | 0 | result.writer = std::make_unique<JsonbWriter>(); |
182 | 0 | result.writer->writeStartArray(); |
183 | 0 | for (const auto* pval : results) { |
184 | 0 | result.writer->writeValue(pval); |
185 | 0 | } |
186 | 0 | result.writer->writeEndArray(); |
187 | |
|
188 | 0 | const JsonbDocument* doc = nullptr; |
189 | 0 | THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument( |
190 | 0 | result.writer->getOutput()->getBuffer(), result.writer->getOutput()->getSize(), |
191 | 0 | &doc)); |
192 | 0 | result.value = doc->getValue(); |
193 | 0 | } |
194 | 95 | } else if (results.size() == 1) { |
195 | 49 | result.value = results[0]; |
196 | 49 | } |
197 | | |
198 | 95 | return result; |
199 | 95 | } |
200 | | |
201 | | std::vector<std::pair<StringRef, const JsonbValue*>> ObjectVal::get_ordered_key_value_pairs() |
202 | 0 | const { |
203 | 0 | std::vector<std::pair<StringRef, const JsonbValue*>> kvs; |
204 | 0 | const auto* obj_val = this; |
205 | 0 | for (auto it = obj_val->begin(); it != obj_val->end(); ++it) { |
206 | 0 | kvs.emplace_back(StringRef(it->getKeyStr(), it->klen()), it->value()); |
207 | 0 | } |
208 | | // sort by key |
209 | 0 | std::sort(kvs.begin(), kvs.end(), |
210 | 0 | [](const auto& left, const auto& right) { return left.first < right.first; }); |
211 | | // unique by key |
212 | 0 | kvs.erase(std::unique(kvs.begin(), kvs.end(), |
213 | 0 | [](const auto& left, const auto& right) { |
214 | 0 | return left.first == right.first; |
215 | 0 | }), |
216 | 0 | kvs.end()); |
217 | 0 | return kvs; |
218 | 0 | } |
219 | | |
220 | | } // namespace doris |