be/src/util/bitmap_expr_calculation.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | #include <stack> |
19 | | #include <string> |
20 | | |
21 | | #include "util/bitmap_intersect.h" |
22 | | |
23 | | namespace doris { |
24 | | |
25 | | // Compute the intersection union difference set of two or more bitmaps |
26 | | // Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column, input_string) |
27 | | // Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'), meaning find the intersection union difference set of user_id in all A/B/C/D 4 bitmaps |
28 | | // Operation symbol: |
29 | | // the operator '|' stands for union, the operator '&' stands for intersection, the operator '-' indicates the difference set, the operator '^' stands for xor |
30 | | class BitmapExprCalculation : public BitmapIntersect<std::string> { |
31 | | public: |
32 | 18 | BitmapExprCalculation() = default; |
33 | | |
34 | 0 | explicit BitmapExprCalculation(const char* src) { deserialize(src); } |
35 | | |
36 | 2 | void bitmap_calculation_init(std::string& input_str) { |
37 | 2 | _polish = reverse_polish(input_str); |
38 | 2 | std::string bitmap_key; |
39 | 20 | for (int i = 0; i < _polish.length(); i++) { |
40 | 18 | char c = _polish.at(i); |
41 | 18 | if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c != '\\') { |
42 | 12 | bitmap_key += c; |
43 | 12 | } else if (i != 0 && _polish.at(i - 1) == '\\') { |
44 | 0 | bitmap_key += c; |
45 | 6 | } else if (c == '\\') { |
46 | 0 | continue; |
47 | 6 | } else { |
48 | 6 | if (bitmap_key.length() > 0) { |
49 | 4 | add_key(bitmap_key); |
50 | 4 | bitmap_key.clear(); |
51 | 4 | } |
52 | 6 | } |
53 | 18 | } |
54 | 2 | if (bitmap_key.length() > 0) { |
55 | 0 | add_key(bitmap_key); |
56 | 0 | bitmap_key.clear(); |
57 | 0 | } |
58 | 2 | } |
59 | | |
60 | 2 | BitmapValue bitmap_calculate() const { |
61 | 2 | std::stack<BitmapValue> values; |
62 | 2 | std::string bitmap_key; |
63 | 20 | for (int i = 0; i < _polish.length(); i++) { |
64 | 18 | char c = _polish.at(i); |
65 | 18 | if (c == ' ') { |
66 | 4 | if (bitmap_key.length() > 0) { |
67 | 2 | values.push(_bitmaps.at(bitmap_key)); |
68 | 2 | bitmap_key.clear(); |
69 | 2 | } |
70 | 14 | } else if (c != '&' && c != '|' && c != '^' && c != '-' && c != '\\') { |
71 | 12 | bitmap_key += c; |
72 | 12 | } else if (i != 0 && _polish.at(i - 1) == '\\') { |
73 | 0 | bitmap_key += c; |
74 | 2 | } else if (c == '\\') { |
75 | 0 | continue; |
76 | 2 | } else { |
77 | 2 | if (bitmap_key.length() > 0) { |
78 | 2 | values.push(_bitmaps.at(bitmap_key)); |
79 | 2 | bitmap_key.clear(); |
80 | 2 | } |
81 | 2 | if (values.size() >= 2) { |
82 | 2 | BitmapValue op_a = values.top(); |
83 | 2 | values.pop(); |
84 | 2 | BitmapValue op_b = values.top(); |
85 | 2 | values.pop(); |
86 | 2 | BitmapValue cal_result; |
87 | 2 | bitmap_calculate(op_a, op_b, c, cal_result); |
88 | 2 | values.push(cal_result); |
89 | 2 | } |
90 | 2 | } |
91 | 18 | } |
92 | 2 | BitmapValue result; |
93 | 2 | if (bitmap_key.length() > 0) { |
94 | 0 | result |= _bitmaps.at(bitmap_key); |
95 | 2 | } else if (!values.empty()) { |
96 | 2 | result |= values.top(); |
97 | 2 | } |
98 | 2 | return result; |
99 | 2 | } |
100 | | |
101 | | // calculate the bitmap value by expr bitmap calculate |
102 | 1 | int64_t bitmap_calculate_count() const { |
103 | 1 | if (_bitmaps.empty()) { |
104 | 0 | return 0; |
105 | 0 | } |
106 | 1 | return bitmap_calculate().cardinality(); |
107 | 1 | } |
108 | | |
109 | | private: |
110 | 0 | constexpr int priority(char c) { |
111 | 0 | switch (c) { |
112 | 0 | case '&': |
113 | 0 | return 1; |
114 | 0 | case '|': |
115 | 0 | return 1; |
116 | 0 | case '^': |
117 | 0 | return 1; |
118 | 0 | case '-': |
119 | 0 | return 1; |
120 | 0 | default: |
121 | 0 | return 0; |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | template <class T> |
126 | 2 | std::string print_stack(std::stack<T>& stack) { |
127 | 2 | std::string result; |
128 | 20 | while (!stack.empty()) { |
129 | 18 | result = stack.top() + result; |
130 | 18 | stack.pop(); |
131 | 18 | } |
132 | 2 | return result; |
133 | 2 | } |
134 | | |
135 | 2 | std::string reverse_polish(const std::string& input_str) { |
136 | 2 | std::stack<char> polish; |
137 | 2 | std::stack<char> op_stack; |
138 | 2 | bool last_is_char = false; |
139 | 20 | for (int i = 0; i < input_str.length(); i++) { |
140 | 18 | char cur_char = input_str.at(i); |
141 | 18 | if (cur_char != '&' && cur_char != '|' && cur_char != '^' && cur_char != '-' && |
142 | 18 | cur_char != '(' && cur_char != ')' && cur_char != ' ' && cur_char != '\t') { |
143 | 12 | if (!last_is_char) { |
144 | 4 | polish.push(' '); |
145 | 4 | } |
146 | 12 | polish.push(cur_char); |
147 | 12 | last_is_char = true; |
148 | 12 | continue; |
149 | 12 | } else if (i != 0 && input_str.at(i - 1) == '\\') { |
150 | 0 | polish.push(cur_char); |
151 | 0 | last_is_char = true; |
152 | 0 | continue; |
153 | 6 | } else if (cur_char == ' ' || cur_char == '\t') { |
154 | 0 | last_is_char = false; |
155 | 0 | continue; |
156 | 6 | } else if (cur_char == '(') { |
157 | 2 | op_stack.push(cur_char); |
158 | 4 | } else if (!op_stack.empty() && cur_char == ')') { |
159 | 4 | while (!op_stack.empty() && op_stack.top() != '(') { |
160 | 2 | polish.push(op_stack.top()); |
161 | 2 | op_stack.pop(); |
162 | 2 | } |
163 | 2 | op_stack.pop(); |
164 | 2 | } else { |
165 | 2 | if (!op_stack.empty() && op_stack.top() == '(') { |
166 | 2 | op_stack.push(cur_char); |
167 | 2 | } else { |
168 | 0 | if (!op_stack.empty() && priority(cur_char) > priority(op_stack.top())) { |
169 | 0 | op_stack.push(cur_char); |
170 | 0 | } else { |
171 | 0 | while (!op_stack.empty()) { |
172 | 0 | if (op_stack.top() == '(') { |
173 | 0 | break; |
174 | 0 | } |
175 | 0 | if (priority(cur_char) <= priority(op_stack.top())) { |
176 | 0 | polish.push(op_stack.top()); |
177 | 0 | op_stack.pop(); |
178 | 0 | } else { |
179 | 0 | break; |
180 | 0 | } |
181 | 0 | } |
182 | 0 | op_stack.push(cur_char); |
183 | 0 | } |
184 | 0 | } |
185 | 2 | } |
186 | 6 | last_is_char = false; |
187 | 6 | } |
188 | | |
189 | 2 | while (!op_stack.empty()) { |
190 | 0 | polish.push(op_stack.top()); |
191 | 0 | op_stack.pop(); |
192 | 0 | } |
193 | 2 | return print_stack(polish); |
194 | 2 | } |
195 | | |
196 | | void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op, |
197 | 2 | BitmapValue& result) const { |
198 | 2 | result |= op_b; |
199 | 2 | switch (op) { |
200 | 2 | case '&': |
201 | 2 | result &= op_a; |
202 | 2 | break; |
203 | 0 | case '|': |
204 | 0 | result |= op_a; |
205 | 0 | break; |
206 | 0 | case '-': |
207 | 0 | result -= op_a; |
208 | 0 | break; |
209 | 0 | case '^': |
210 | 0 | result ^= op_a; |
211 | 0 | break; |
212 | 2 | } |
213 | 2 | } |
214 | | |
215 | | std::string _polish; |
216 | | }; |
217 | | } // namespace doris |