/root/doris/be/src/util/bitmap_intersect.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | #include <parallel_hashmap/phmap.h> |
19 | | |
20 | | #include "common/cast_set.h" |
21 | | #include "util/bitmap_value.h" |
22 | | #include "vec/common/string_ref.h" |
23 | | |
24 | | namespace doris { |
25 | | #include "common/compile_check_begin.h" |
26 | | |
27 | | namespace detail { |
28 | | class Helper { |
29 | | public: |
30 | | static const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; |
31 | | static const int DATETIME_TYPE_BYTE_SIZE = 4; |
32 | | static const int DECIMAL_BYTE_SIZE = 16; |
33 | | |
34 | | // serialize_size start |
35 | | template <typename T> |
36 | 0 | static int32_t serialize_size(const T& v) { |
37 | 0 | return sizeof(T); |
38 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIaEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIsEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIiEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIlEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeInEEiRKT_ |
39 | | |
40 | | // write_to start |
41 | | template <typename T> |
42 | 0 | static char* write_to(const T& v, char* dest) { |
43 | 0 | size_t type_size = sizeof(T); |
44 | 0 | memcpy(dest, &v, type_size); |
45 | 0 | dest += type_size; |
46 | 0 | return dest; |
47 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIaEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIsEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIiEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIlEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toInEEPcRKT_S3_ |
48 | | |
49 | | // read_from start |
50 | | template <typename T> |
51 | 0 | static void read_from(const char** src, T* result) { |
52 | 0 | size_t type_size = sizeof(T); |
53 | 0 | memcpy(result, *src, type_size); |
54 | 0 | *src += type_size; |
55 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIaEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIsEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIiEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIlEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromInEEvPPKcPT_ |
56 | | }; |
57 | | |
58 | | template <> |
59 | 0 | char* Helper::write_to<VecDateTimeValue>(const VecDateTimeValue& v, char* dest) { |
60 | 0 | *(int64_t*)dest = v.to_int64_datetime_packed(); |
61 | 0 | dest += DATETIME_PACKED_TIME_BYTE_SIZE; |
62 | 0 | *(int*)dest = v.type(); |
63 | 0 | dest += DATETIME_TYPE_BYTE_SIZE; |
64 | 0 | return dest; |
65 | 0 | } |
66 | | |
67 | | template <> |
68 | 0 | char* Helper::write_to<DecimalV2Value>(const DecimalV2Value& v, char* dest) { |
69 | 0 | __int128 value = v.value(); |
70 | 0 | memcpy(dest, &value, DECIMAL_BYTE_SIZE); |
71 | 0 | dest += DECIMAL_BYTE_SIZE; |
72 | 0 | return dest; |
73 | 0 | } |
74 | | |
75 | | template <> |
76 | 0 | char* Helper::write_to<StringRef>(const StringRef& v, char* dest) { |
77 | 0 | *(int32_t*)dest = cast_set<int32_t>(v.size); |
78 | 0 | dest += 4; |
79 | 0 | memcpy(dest, v.data, v.size); |
80 | 0 | dest += v.size; |
81 | 0 | return dest; |
82 | 0 | } |
83 | | |
84 | | template <> |
85 | 0 | char* Helper::write_to<std::string>(const std::string& v, char* dest) { |
86 | 0 | *(uint32_t*)dest = cast_set<uint32_t>(v.size()); |
87 | 0 | dest += 4; |
88 | 0 | memcpy(dest, v.c_str(), v.size()); |
89 | 0 | dest += v.size(); |
90 | 0 | return dest; |
91 | 0 | } |
92 | | // write_to end |
93 | | |
94 | | template <> |
95 | 0 | int32_t Helper::serialize_size<VecDateTimeValue>(const VecDateTimeValue& v) { |
96 | 0 | return Helper::DATETIME_PACKED_TIME_BYTE_SIZE + Helper::DATETIME_TYPE_BYTE_SIZE; |
97 | 0 | } |
98 | | |
99 | | template <> |
100 | 0 | int32_t Helper::serialize_size<DecimalV2Value>(const DecimalV2Value& v) { |
101 | 0 | return Helper::DECIMAL_BYTE_SIZE; |
102 | 0 | } |
103 | | |
104 | | template <> |
105 | 0 | int32_t Helper::serialize_size<StringRef>(const StringRef& v) { |
106 | 0 | return cast_set<int32_t>(v.size + 4); |
107 | 0 | } |
108 | | |
109 | | template <> |
110 | 0 | int32_t Helper::serialize_size<std::string>(const std::string& v) { |
111 | 0 | return cast_set<int32_t>(v.size() + 4); |
112 | 0 | } |
113 | | // serialize_size end |
114 | | |
115 | | template <> |
116 | 0 | void Helper::read_from<VecDateTimeValue>(const char** src, VecDateTimeValue* result) { |
117 | 0 | result->from_packed_time(*(int64_t*)(*src)); |
118 | 0 | *src += DATETIME_PACKED_TIME_BYTE_SIZE; |
119 | 0 | if (*(int*)(*src) == TIME_DATE) { |
120 | 0 | result->cast_to_date(); |
121 | 0 | } |
122 | 0 | *src += DATETIME_TYPE_BYTE_SIZE; |
123 | 0 | } |
124 | | |
125 | | template <> |
126 | 0 | void Helper::read_from<DecimalV2Value>(const char** src, DecimalV2Value* result) { |
127 | 0 | __int128 v = 0; |
128 | 0 | memcpy(&v, *src, DECIMAL_BYTE_SIZE); |
129 | 0 | *src += DECIMAL_BYTE_SIZE; |
130 | 0 | *result = DecimalV2Value(v); |
131 | 0 | } |
132 | | |
133 | | template <> |
134 | 0 | void Helper::read_from<StringRef>(const char** src, StringRef* result) { |
135 | 0 | int32_t length = *(int32_t*)(*src); |
136 | 0 | *src += 4; |
137 | 0 | *result = StringRef((char*)*src, length); |
138 | 0 | *src += length; |
139 | 0 | } |
140 | | |
141 | | template <> |
142 | 0 | void Helper::read_from<std::string>(const char** src, std::string* result) { |
143 | 0 | int32_t length = *(int32_t*)(*src); |
144 | 0 | *src += 4; |
145 | 0 | *result = std::string((char*)*src, length); |
146 | 0 | *src += length; |
147 | 0 | } |
148 | | // read_from end |
149 | | } // namespace detail |
150 | | |
151 | | // Calculate the intersection of two or more bitmaps |
152 | | // Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) |
153 | | // Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps |
154 | | // Todo(kks) Use Array type instead of variable arguments |
155 | | template <typename T> |
156 | | struct BitmapIntersect { |
157 | | public: |
158 | 0 | BitmapIntersect() = default; Unexecuted instantiation: _ZN5doris15BitmapIntersectIaEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIsEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIiEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIlEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectInEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2Ev |
159 | | |
160 | | explicit BitmapIntersect(const char* src) { deserialize(src); } |
161 | | |
162 | 0 | void add_key(const T key) { |
163 | 0 | BitmapValue empty_bitmap; |
164 | 0 | _bitmaps[key] = empty_bitmap; |
165 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE7add_keyEa Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE7add_keyEs Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE7add_keyEi Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE7add_keyEl Unexecuted instantiation: _ZN5doris15BitmapIntersectInE7add_keyEn Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE7add_keyES6_ |
166 | | |
167 | 0 | void update(const T& key, const BitmapValue& bitmap) { |
168 | 0 | if (_bitmaps.find(key) != _bitmaps.end()) { |
169 | 0 | _bitmaps[key] |= bitmap; |
170 | 0 | } |
171 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE6updateERKaRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE6updateERKsRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE6updateERKiRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE6updateERKlRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectInE6updateERKnRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE6updateERKS6_RKNS_11BitmapValueE |
172 | | |
173 | 0 | void merge(const BitmapIntersect& other) { |
174 | 0 | for (auto& kv : other._bitmaps) { |
175 | 0 | if (_bitmaps.find(kv.first) != _bitmaps.end()) { |
176 | 0 | _bitmaps[kv.first] |= kv.second; |
177 | 0 | } else { |
178 | 0 | _bitmaps[kv.first] = kv.second; |
179 | 0 | } |
180 | 0 | } |
181 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectInE5mergeERKS1_ |
182 | | |
183 | | // intersection |
184 | 0 | BitmapValue intersect() const { |
185 | 0 | BitmapValue result; |
186 | 0 | if (_bitmaps.empty()) { |
187 | 0 | return result; |
188 | 0 | } |
189 | 0 | auto it = _bitmaps.begin(); |
190 | 0 | result |= it->second; |
191 | 0 | it++; |
192 | 0 | for (; it != _bitmaps.end(); it++) { |
193 | 0 | result &= it->second; |
194 | 0 | } |
195 | 0 | return result; |
196 | 0 | } Unexecuted instantiation: _ZNK5doris15BitmapIntersectIaE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIsE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIiE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIlE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectInE9intersectEv |
197 | | |
198 | | // calculate the intersection for _bitmaps's bitmap values |
199 | 0 | int64_t intersect_count() const { |
200 | 0 | if (_bitmaps.empty()) { |
201 | 0 | return 0; |
202 | 0 | } |
203 | 0 | return intersect().cardinality(); |
204 | 0 | } Unexecuted instantiation: _ZNK5doris15BitmapIntersectIaE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIsE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIiE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIlE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectInE15intersect_countEv |
205 | | |
206 | | // the serialize size |
207 | 0 | size_t size() { |
208 | 0 | size_t size = 4; |
209 | 0 | for (auto& kv : _bitmaps) { |
210 | 0 | size += detail::Helper::serialize_size(kv.first); |
211 | 0 | size += kv.second.getSizeInBytes(); |
212 | 0 | } |
213 | 0 | return size; |
214 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectInE4sizeEv |
215 | | |
216 | | //must call size() first |
217 | 0 | void serialize(char* dest) { |
218 | 0 | char* writer = dest; |
219 | 0 | *(int32_t*)writer = cast_set<int32_t>(_bitmaps.size()); |
220 | 0 | writer += 4; |
221 | 0 | for (auto& kv : _bitmaps) { |
222 | 0 | writer = detail::Helper::write_to(kv.first, writer); |
223 | 0 | kv.second.write_to(writer); |
224 | 0 | writer += kv.second.getSizeInBytes(); |
225 | 0 | } |
226 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectInE9serializeEPc |
227 | | |
228 | 0 | void deserialize(const char* src) { |
229 | 0 | const char* reader = src; |
230 | 0 | int32_t bitmaps_size = *(int32_t*)reader; |
231 | 0 | reader += 4; |
232 | 0 | for (int32_t i = 0; i < bitmaps_size; i++) { |
233 | 0 | T key; |
234 | 0 | detail::Helper::read_from(&reader, &key); |
235 | 0 | BitmapValue bitmap(reader); |
236 | 0 | reader += bitmap.getSizeInBytes(); |
237 | 0 | _bitmaps[key] = bitmap; |
238 | 0 | } |
239 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectInE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE11deserializeEPKc |
240 | | |
241 | | protected: |
242 | | std::map<T, BitmapValue> _bitmaps; |
243 | | }; |
244 | | |
245 | | template <> |
246 | | struct BitmapIntersect<std::string_view> { |
247 | | public: |
248 | 0 | BitmapIntersect() = default; |
249 | | |
250 | 0 | explicit BitmapIntersect(const char* src) { deserialize(src); } |
251 | | |
252 | 0 | void add_key(const std::string_view key) { |
253 | 0 | BitmapValue empty_bitmap; |
254 | 0 | _bitmaps[key] = empty_bitmap; |
255 | 0 | } |
256 | | |
257 | 0 | void update(const std::string_view& key, const BitmapValue& bitmap) { |
258 | 0 | if (_bitmaps.find(key) != _bitmaps.end()) { |
259 | 0 | _bitmaps[key] |= bitmap; |
260 | 0 | } |
261 | 0 | } |
262 | | |
263 | 0 | void merge(const BitmapIntersect& other) { |
264 | 0 | for (auto& kv : other._bitmaps) { |
265 | 0 | if (_bitmaps.find(kv.first) != _bitmaps.end()) { |
266 | 0 | _bitmaps[kv.first] |= kv.second; |
267 | 0 | } else { |
268 | 0 | _bitmaps[kv.first] = kv.second; |
269 | 0 | } |
270 | 0 | } |
271 | 0 | } |
272 | | |
273 | | // intersection |
274 | 0 | BitmapValue intersect() const { |
275 | 0 | BitmapValue result; |
276 | 0 | auto it = _bitmaps.begin(); |
277 | 0 | result |= it->second; |
278 | 0 | it++; |
279 | 0 | for (; it != _bitmaps.end(); it++) { |
280 | 0 | result &= it->second; |
281 | 0 | } |
282 | 0 | return result; |
283 | 0 | } |
284 | | |
285 | | // calculate the intersection for _bitmaps's bitmap values |
286 | 0 | int64_t intersect_count() const { |
287 | 0 | if (_bitmaps.empty()) { |
288 | 0 | return 0; |
289 | 0 | } |
290 | 0 | return intersect().cardinality(); |
291 | 0 | } |
292 | | |
293 | | // the serialize size |
294 | 0 | size_t size() { |
295 | 0 | size_t size = 4; |
296 | 0 | for (auto& kv : _bitmaps) { |
297 | 0 | size += detail::Helper::serialize_size(kv.first); |
298 | 0 | size += kv.second.getSizeInBytes(); |
299 | 0 | } |
300 | 0 | return size; |
301 | 0 | } |
302 | | |
303 | | //must call size() first |
304 | 0 | void serialize(char* dest) { |
305 | 0 | char* writer = dest; |
306 | 0 | *(int32_t*)writer = cast_set<int32_t>(_bitmaps.size()); |
307 | 0 | writer += 4; |
308 | 0 | for (auto& kv : _bitmaps) { |
309 | 0 | writer = detail::Helper::write_to(kv.first, writer); |
310 | 0 | kv.second.write_to(writer); |
311 | 0 | writer += kv.second.getSizeInBytes(); |
312 | 0 | } |
313 | 0 | } |
314 | | |
315 | 0 | void deserialize(const char* src) { |
316 | 0 | const char* reader = src; |
317 | 0 | int32_t bitmaps_size = *(int32_t*)reader; |
318 | 0 | reader += 4; |
319 | 0 | for (int32_t i = 0; i < bitmaps_size; i++) { |
320 | 0 | std::string key; |
321 | 0 | detail::Helper::read_from(&reader, &key); |
322 | 0 | BitmapValue bitmap(reader); |
323 | 0 | reader += bitmap.getSizeInBytes(); |
324 | 0 | _bitmaps[key] = bitmap; |
325 | 0 | } |
326 | 0 | } |
327 | | |
328 | | protected: |
329 | | phmap::flat_hash_map<std::string, BitmapValue> _bitmaps; |
330 | | }; |
331 | | #include "common/compile_check_end.h" |
332 | | } // namespace doris |