be/src/util/bitmap_intersect.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | #include <parallel_hashmap/phmap.h> |
19 | | |
20 | | #include "common/cast_set.h" |
21 | | #include "core/string_ref.h" |
22 | | #include "core/value/bitmap_value.h" |
23 | | |
24 | | namespace doris { |
25 | | |
26 | | namespace detail { |
27 | | class Helper { |
28 | | public: |
29 | | static const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; |
30 | | static const int DATETIME_TYPE_BYTE_SIZE = 4; |
31 | | static const int DECIMAL_BYTE_SIZE = 16; |
32 | | |
33 | | // serialize_size start |
34 | | template <typename T> |
35 | 0 | static int32_t serialize_size(const T& v) { |
36 | 0 | return sizeof(T); |
37 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIaEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIsEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIiEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeIlEEiRKT_ Unexecuted instantiation: _ZN5doris6detail6Helper14serialize_sizeInEEiRKT_ |
38 | | |
39 | | // write_to start |
40 | | template <typename T> |
41 | 0 | static char* write_to(const T& v, char* dest) { |
42 | 0 | size_t type_size = sizeof(T); |
43 | 0 | memcpy(dest, &v, type_size); |
44 | 0 | dest += type_size; |
45 | 0 | return dest; |
46 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIaEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIsEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIiEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toIlEEPcRKT_S3_ Unexecuted instantiation: _ZN5doris6detail6Helper8write_toInEEPcRKT_S3_ |
47 | | |
48 | | // read_from start |
49 | | template <typename T> |
50 | 0 | static void read_from(const char** src, T* result) { |
51 | 0 | size_t type_size = sizeof(T); |
52 | 0 | memcpy(result, *src, type_size); |
53 | 0 | *src += type_size; |
54 | 0 | } Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIaEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIsEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIiEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromIlEEvPPKcPT_ Unexecuted instantiation: _ZN5doris6detail6Helper9read_fromInEEvPPKcPT_ |
55 | | }; |
56 | | |
57 | | template <> |
58 | 0 | inline char* Helper::write_to<VecDateTimeValue>(const VecDateTimeValue& v, char* dest) { |
59 | 0 | *(int64_t*)dest = v.to_int64_datetime_packed(); |
60 | 0 | dest += DATETIME_PACKED_TIME_BYTE_SIZE; |
61 | 0 | *(int*)dest = v.type(); |
62 | 0 | dest += DATETIME_TYPE_BYTE_SIZE; |
63 | 0 | return dest; |
64 | 0 | } |
65 | | |
66 | | template <> |
67 | 0 | inline char* Helper::write_to<DecimalV2Value>(const DecimalV2Value& v, char* dest) { |
68 | 0 | __int128 value = v.value(); |
69 | 0 | memcpy(dest, &value, DECIMAL_BYTE_SIZE); |
70 | 0 | dest += DECIMAL_BYTE_SIZE; |
71 | 0 | return dest; |
72 | 0 | } |
73 | | |
74 | | template <> |
75 | 0 | inline char* Helper::write_to<StringRef>(const StringRef& v, char* dest) { |
76 | 0 | *(int32_t*)dest = cast_set<int32_t>(v.size); |
77 | 0 | dest += 4; |
78 | 0 | memcpy(dest, v.data, v.size); |
79 | 0 | dest += v.size; |
80 | 0 | return dest; |
81 | 0 | } |
82 | | |
83 | | template <> |
84 | 0 | inline char* Helper::write_to<std::string>(const std::string& v, char* dest) { |
85 | 0 | *(uint32_t*)dest = cast_set<uint32_t>(v.size()); |
86 | 0 | dest += 4; |
87 | 0 | memcpy(dest, v.c_str(), v.size()); |
88 | 0 | dest += v.size(); |
89 | 0 | return dest; |
90 | 0 | } |
91 | | // write_to end |
92 | | |
93 | | template <> |
94 | 0 | inline int32_t Helper::serialize_size<VecDateTimeValue>(const VecDateTimeValue& v) { |
95 | 0 | return Helper::DATETIME_PACKED_TIME_BYTE_SIZE + Helper::DATETIME_TYPE_BYTE_SIZE; |
96 | 0 | } |
97 | | |
98 | | template <> |
99 | 0 | inline int32_t Helper::serialize_size<DecimalV2Value>(const DecimalV2Value& v) { |
100 | 0 | return Helper::DECIMAL_BYTE_SIZE; |
101 | 0 | } |
102 | | |
103 | | template <> |
104 | 0 | inline int32_t Helper::serialize_size<StringRef>(const StringRef& v) { |
105 | 0 | return cast_set<int32_t>(v.size + 4); |
106 | 0 | } |
107 | | |
108 | | template <> |
109 | 0 | inline int32_t Helper::serialize_size<std::string>(const std::string& v) { |
110 | 0 | return cast_set<int32_t>(v.size() + 4); |
111 | 0 | } |
112 | | // serialize_size end |
113 | | |
114 | | template <> |
115 | 0 | inline void Helper::read_from<VecDateTimeValue>(const char** src, VecDateTimeValue* result) { |
116 | 0 | result->from_packed_time(*(int64_t*)(*src)); |
117 | 0 | *src += DATETIME_PACKED_TIME_BYTE_SIZE; |
118 | 0 | if (*(int*)(*src) == TIME_DATE) { |
119 | 0 | result->cast_to_date(); |
120 | 0 | } |
121 | 0 | *src += DATETIME_TYPE_BYTE_SIZE; |
122 | 0 | } |
123 | | |
124 | | template <> |
125 | 0 | inline void Helper::read_from<DecimalV2Value>(const char** src, DecimalV2Value* result) { |
126 | 0 | __int128 v = 0; |
127 | 0 | memcpy(&v, *src, DECIMAL_BYTE_SIZE); |
128 | 0 | *src += DECIMAL_BYTE_SIZE; |
129 | 0 | *result = DecimalV2Value(v); |
130 | 0 | } |
131 | | |
132 | | template <> |
133 | 0 | inline void Helper::read_from<StringRef>(const char** src, StringRef* result) { |
134 | 0 | int32_t length = *(int32_t*)(*src); |
135 | 0 | *src += 4; |
136 | 0 | *result = StringRef((char*)*src, length); |
137 | 0 | *src += length; |
138 | 0 | } |
139 | | |
140 | | template <> |
141 | 0 | inline void Helper::read_from<std::string>(const char** src, std::string* result) { |
142 | 0 | int32_t length = *(int32_t*)(*src); |
143 | 0 | *src += 4; |
144 | 0 | *result = std::string((char*)*src, length); |
145 | 0 | *src += length; |
146 | 0 | } |
147 | | // read_from end |
148 | | } // namespace detail |
149 | | |
150 | | // Calculate the intersection of two or more bitmaps |
151 | | // Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) |
152 | | // Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps |
153 | | // Todo(kks) Use Array type instead of variable arguments |
154 | | template <typename T> |
155 | | struct BitmapIntersect { |
156 | | public: |
157 | 0 | BitmapIntersect() = default; Unexecuted instantiation: _ZN5doris15BitmapIntersectIaEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIsEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIiEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectIlEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectInEC2Ev Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEC2Ev |
158 | | |
159 | | explicit BitmapIntersect(const char* src) { deserialize(src); } |
160 | | |
161 | 0 | void add_key(const T key) { |
162 | 0 | BitmapValue empty_bitmap; |
163 | 0 | _bitmaps[key] = empty_bitmap; |
164 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE7add_keyEa Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE7add_keyEs Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE7add_keyEi Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE7add_keyEl Unexecuted instantiation: _ZN5doris15BitmapIntersectInE7add_keyEn Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE7add_keyES6_ |
165 | | |
166 | 0 | void update(const T& key, const BitmapValue& bitmap) { |
167 | 0 | if (_bitmaps.find(key) != _bitmaps.end()) { |
168 | 0 | _bitmaps[key] |= bitmap; |
169 | 0 | } |
170 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE6updateERKaRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE6updateERKsRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE6updateERKiRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE6updateERKlRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectInE6updateERKnRKNS_11BitmapValueE Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE6updateERKS6_RKNS_11BitmapValueE |
171 | | |
172 | 0 | void merge(const BitmapIntersect& other) { |
173 | 0 | for (auto& kv : other._bitmaps) { |
174 | 0 | if (_bitmaps.find(kv.first) != _bitmaps.end()) { |
175 | 0 | _bitmaps[kv.first] |= kv.second; |
176 | 0 | } else { |
177 | 0 | _bitmaps[kv.first] = kv.second; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE5mergeERKS1_ Unexecuted instantiation: _ZN5doris15BitmapIntersectInE5mergeERKS1_ |
181 | | |
182 | | // intersection |
183 | 0 | BitmapValue intersect() const { |
184 | 0 | BitmapValue result; |
185 | 0 | if (_bitmaps.empty()) { |
186 | 0 | return result; |
187 | 0 | } |
188 | 0 | auto it = _bitmaps.begin(); |
189 | 0 | result |= it->second; |
190 | 0 | it++; |
191 | 0 | for (; it != _bitmaps.end(); it++) { |
192 | 0 | result &= it->second; |
193 | 0 | } |
194 | 0 | return result; |
195 | 0 | } Unexecuted instantiation: _ZNK5doris15BitmapIntersectIaE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIsE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIiE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIlE9intersectEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectInE9intersectEv |
196 | | |
197 | | // calculate the intersection for _bitmaps's bitmap values |
198 | 0 | int64_t intersect_count() const { |
199 | 0 | if (_bitmaps.empty()) { |
200 | 0 | return 0; |
201 | 0 | } |
202 | 0 | return intersect().cardinality(); |
203 | 0 | } Unexecuted instantiation: _ZNK5doris15BitmapIntersectIaE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIsE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIiE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectIlE15intersect_countEv Unexecuted instantiation: _ZNK5doris15BitmapIntersectInE15intersect_countEv |
204 | | |
205 | | // the serialize size |
206 | 0 | size_t size() { |
207 | 0 | size_t size = 4; |
208 | 0 | for (auto& kv : _bitmaps) { |
209 | 0 | size += detail::Helper::serialize_size(kv.first); |
210 | 0 | size += kv.second.getSizeInBytes(); |
211 | 0 | } |
212 | 0 | return size; |
213 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE4sizeEv Unexecuted instantiation: _ZN5doris15BitmapIntersectInE4sizeEv |
214 | | |
215 | | //must call size() first |
216 | 0 | void serialize(char* dest) { |
217 | 0 | char* writer = dest; |
218 | 0 | *(int32_t*)writer = cast_set<int32_t>(_bitmaps.size()); |
219 | 0 | writer += 4; |
220 | 0 | for (auto& kv : _bitmaps) { |
221 | 0 | writer = detail::Helper::write_to(kv.first, writer); |
222 | 0 | kv.second.write_to(writer); |
223 | 0 | writer += kv.second.getSizeInBytes(); |
224 | 0 | } |
225 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE9serializeEPc Unexecuted instantiation: _ZN5doris15BitmapIntersectInE9serializeEPc |
226 | | |
227 | 0 | void deserialize(const char* src) { |
228 | 0 | const char* reader = src; |
229 | 0 | int32_t bitmaps_size = *(int32_t*)reader; |
230 | 0 | reader += 4; |
231 | 0 | for (int32_t i = 0; i < bitmaps_size; i++) { |
232 | 0 | T key; |
233 | 0 | detail::Helper::read_from(&reader, &key); |
234 | 0 | BitmapValue bitmap(reader); |
235 | 0 | reader += bitmap.getSizeInBytes(); |
236 | 0 | _bitmaps[key] = bitmap; |
237 | 0 | } |
238 | 0 | } Unexecuted instantiation: _ZN5doris15BitmapIntersectINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIaE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIsE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIiE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectIlE11deserializeEPKc Unexecuted instantiation: _ZN5doris15BitmapIntersectInE11deserializeEPKc |
239 | | |
240 | | protected: |
241 | | std::map<T, BitmapValue> _bitmaps; |
242 | | }; |
243 | | |
244 | | template <> |
245 | | struct BitmapIntersect<std::string_view> { |
246 | | public: |
247 | 0 | BitmapIntersect() = default; |
248 | | |
249 | 0 | explicit BitmapIntersect(const char* src) { deserialize(src); } |
250 | | |
251 | 0 | void add_key(const std::string_view key) { |
252 | 0 | BitmapValue empty_bitmap; |
253 | 0 | _bitmaps[key] = empty_bitmap; |
254 | 0 | } |
255 | | |
256 | 0 | void update(const std::string_view& key, const BitmapValue& bitmap) { |
257 | 0 | if (_bitmaps.find(key) != _bitmaps.end()) { |
258 | 0 | _bitmaps[key] |= bitmap; |
259 | 0 | } |
260 | 0 | } |
261 | | |
262 | 0 | void merge(const BitmapIntersect& other) { |
263 | 0 | for (auto& kv : other._bitmaps) { |
264 | 0 | if (_bitmaps.find(kv.first) != _bitmaps.end()) { |
265 | 0 | _bitmaps[kv.first] |= kv.second; |
266 | 0 | } else { |
267 | 0 | _bitmaps[kv.first] = kv.second; |
268 | 0 | } |
269 | 0 | } |
270 | 0 | } |
271 | | |
272 | | // intersection |
273 | 0 | BitmapValue intersect() const { |
274 | 0 | BitmapValue result; |
275 | 0 | auto it = _bitmaps.begin(); |
276 | 0 | result |= it->second; |
277 | 0 | it++; |
278 | 0 | for (; it != _bitmaps.end(); it++) { |
279 | 0 | result &= it->second; |
280 | 0 | } |
281 | 0 | return result; |
282 | 0 | } |
283 | | |
284 | | // calculate the intersection for _bitmaps's bitmap values |
285 | 0 | int64_t intersect_count() const { |
286 | 0 | if (_bitmaps.empty()) { |
287 | 0 | return 0; |
288 | 0 | } |
289 | 0 | return intersect().cardinality(); |
290 | 0 | } |
291 | | |
292 | | // the serialize size |
293 | 0 | size_t size() { |
294 | 0 | size_t size = 4; |
295 | 0 | for (auto& kv : _bitmaps) { |
296 | 0 | size += detail::Helper::serialize_size(kv.first); |
297 | 0 | size += kv.second.getSizeInBytes(); |
298 | 0 | } |
299 | 0 | return size; |
300 | 0 | } |
301 | | |
302 | | //must call size() first |
303 | 0 | void serialize(char* dest) { |
304 | 0 | char* writer = dest; |
305 | 0 | *(int32_t*)writer = cast_set<int32_t>(_bitmaps.size()); |
306 | 0 | writer += 4; |
307 | 0 | for (auto& kv : _bitmaps) { |
308 | 0 | writer = detail::Helper::write_to(kv.first, writer); |
309 | 0 | kv.second.write_to(writer); |
310 | 0 | writer += kv.second.getSizeInBytes(); |
311 | 0 | } |
312 | 0 | } |
313 | | |
314 | 0 | void deserialize(const char* src) { |
315 | 0 | const char* reader = src; |
316 | 0 | int32_t bitmaps_size = *(int32_t*)reader; |
317 | 0 | reader += 4; |
318 | 0 | for (int32_t i = 0; i < bitmaps_size; i++) { |
319 | 0 | std::string key; |
320 | 0 | detail::Helper::read_from(&reader, &key); |
321 | 0 | BitmapValue bitmap(reader); |
322 | 0 | reader += bitmap.getSizeInBytes(); |
323 | 0 | _bitmaps[key] = bitmap; |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | | protected: |
328 | | phmap::flat_hash_map<std::string, BitmapValue> _bitmaps; |
329 | | }; |
330 | | } // namespace doris |