be/src/storage/segment/row_ranges.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <roaring/roaring.hh> |
21 | | #include <string> |
22 | | #include <vector> |
23 | | |
24 | | #include "absl/strings/substitute.h" |
25 | | #include "common/cast_set.h" |
26 | | #include "common/logging.h" |
27 | | #include "storage/segment/common.h" |
28 | | |
29 | | namespace doris { |
30 | | #include "common/compile_check_begin.h" |
31 | | namespace segment_v2 { |
32 | | |
33 | | // RowRange stands for range[From, To), From is inclusive, |
34 | | // To is exclusive. It is used for row id range calculation. |
35 | | class RowRange { |
36 | | public: |
37 | | // Returns true if two ranges are overlapped or false. |
38 | | // The union range will be returned through range. |
39 | 47 | static bool range_union(const RowRange& left, const RowRange& right, RowRange* range) { |
40 | 47 | if (left._from <= right._from) { |
41 | 47 | if (left._to >= right._from) { |
42 | 3 | range->_from = left._from; |
43 | 3 | range->_to = std::max(left._to, right._to); |
44 | 3 | return true; |
45 | 3 | } |
46 | 47 | } else if (right._to >= left._from) { |
47 | 0 | range->_from = right._from; |
48 | 0 | range->_to = std::max(left._to, right._to); |
49 | 0 | return true; |
50 | 0 | } |
51 | | // return a invalid range |
52 | 44 | range->_from = 0; |
53 | 44 | range->_to = 0; |
54 | 44 | return false; |
55 | 47 | } |
56 | | |
57 | | // Returns true if the two ranges are intersected or false. |
58 | | // The intersection of the two ranges is returned through range. |
59 | 1.55k | static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) { |
60 | 1.55k | if (left._from <= right._from) { |
61 | 1.53k | if (left._to > right._from) { |
62 | 1.53k | range->_from = right._from; |
63 | 1.53k | range->_to = std::min(left._to, right._to); |
64 | 1.53k | return true; |
65 | 1.53k | } |
66 | 1.53k | } else if (right._to > left._from) { |
67 | 22 | range->_from = left._from; |
68 | 22 | range->_to = std::min(left._to, right._to); |
69 | 22 | return true; |
70 | 22 | } |
71 | | // return a invalid range |
72 | 2 | range->_from = 0; |
73 | 2 | range->_to = 0; |
74 | 2 | return false; |
75 | 1.55k | } |
76 | | |
77 | 1.60k | RowRange() : _from(0), _to(0) {} |
78 | | |
79 | | // Creates a range of [from, to) (from inclusive and to exclusive; empty ranges are invalid) |
80 | 1.64k | RowRange(int64_t from, int64_t to) : _from(from), _to(to) {} |
81 | | |
82 | 6 | bool is_valid() const { return _from < _to; } |
83 | | |
84 | 6.39k | size_t count() const { return _to - _from; } |
85 | | |
86 | 1.65k | bool is_before(const RowRange& other) const { return _to <= other._from; } |
87 | | |
88 | 1.69k | bool is_after(const RowRange& other) const { return _from >= other._to; } |
89 | | |
90 | 831 | int64_t from() const { return _from; } |
91 | | |
92 | 687 | int64_t to() const { return _to; } |
93 | | |
94 | 0 | std::string to_string() const { return absl::Substitute("[$0-$1)", _from, _to); } |
95 | | |
96 | 0 | uint64_t get_digest(uint64_t seed) const { |
97 | 0 | uint64_t hash = seed; |
98 | 0 | hash = hash * 31 + _from; |
99 | 0 | hash = hash * 31 + _to; |
100 | 0 | return hash; |
101 | 0 | } |
102 | | |
103 | | private: |
104 | | int64_t _from; |
105 | | int64_t _to; |
106 | | }; |
107 | | |
108 | | class RowRanges { |
109 | | public: |
110 | 10.9k | RowRanges() : _count(0) {} |
111 | | |
112 | 5.62k | void clear() { |
113 | 5.62k | _ranges.clear(); |
114 | 5.62k | _count = 0; |
115 | 5.62k | } |
116 | | |
117 | | // Creates a new RowRanges object with the single range [0, row_count). |
118 | 1.40k | static RowRanges create_single(uint64_t row_count) { |
119 | 1.40k | RowRanges ranges; |
120 | 1.40k | ranges.add(RowRange(0, row_count)); |
121 | 1.40k | return ranges; |
122 | 1.40k | } |
123 | | |
124 | | // Creates a new RowRanges object with the single range [from, to). |
125 | 8 | static RowRanges create_single(int64_t from, int64_t to) { |
126 | 8 | DCHECK(from <= to); |
127 | 8 | RowRanges ranges; |
128 | 8 | ranges.add(RowRange(from, to)); |
129 | 8 | return ranges; |
130 | 8 | } |
131 | | |
132 | | // Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are |
133 | | // elements between them. Otherwise, the two disjunct ranges are stored separately. |
134 | | // For example: |
135 | | // [113, 241) ∪ [221, 340) = [113, 340) |
136 | | // [113, 230) ∪ [230, 340) = [113, 340] |
137 | | // while |
138 | | // [113, 230) ∪ [231, 340) = [113, 230), [231, 340) |
139 | 3 | static void ranges_union(const RowRanges& left, const RowRanges& right, RowRanges* result) { |
140 | 3 | RowRanges tmp_range; |
141 | 3 | auto it1 = left._ranges.begin(); |
142 | 3 | auto it2 = right._ranges.begin(); |
143 | | // merge and add |
144 | 6 | while (it1 != left._ranges.end() && it2 != right._ranges.end()) { |
145 | 3 | if (it1->is_after(*it2)) { |
146 | 0 | tmp_range.add(*it2); |
147 | 0 | ++it2; |
148 | 3 | } else { |
149 | 3 | tmp_range.add(*it1); |
150 | 3 | ++it1; |
151 | 3 | } |
152 | 3 | } |
153 | 3 | while (it1 != left._ranges.end()) { |
154 | 0 | tmp_range.add(*it1); |
155 | 0 | ++it1; |
156 | 0 | } |
157 | 6 | while (it2 != right._ranges.end()) { |
158 | 3 | tmp_range.add(*it2); |
159 | 3 | ++it2; |
160 | 3 | } |
161 | 3 | *result = std::move(tmp_range); |
162 | 3 | } |
163 | | |
164 | | // Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common |
165 | | // elements otherwise the result is empty. |
166 | | // For example: |
167 | | // [113, 241) ∩ [221, 340) = [221, 241) |
168 | | // while |
169 | | // [113, 230) ∩ [230, 340) = <EMPTY> |
170 | | // |
171 | | // The result RowRanges object will contain all the row indexes there were contained in both of the specified objects |
172 | | static void ranges_intersection(const RowRanges& left, const RowRanges& right, |
173 | 1.58k | RowRanges* result) { |
174 | 1.58k | RowRanges tmp_range; |
175 | 1.58k | int right_index = 0; |
176 | 3.17k | for (auto it1 = left._ranges.begin(); it1 != left._ranges.end(); ++it1) { |
177 | 1.58k | const RowRange& range1 = *it1; |
178 | 3.23k | for (int i = right_index; i < right._ranges.size(); ++i) { |
179 | 1.64k | const RowRange& range2 = right._ranges[i]; |
180 | 1.64k | if (range1.is_before(range2)) { |
181 | 3 | break; |
182 | 1.64k | } else if (range1.is_after(range2)) { |
183 | 90 | right_index = i + 1; |
184 | 90 | continue; |
185 | 90 | } |
186 | 1.55k | RowRange merge_range; |
187 | 1.55k | bool ret = RowRange::range_intersection(range1, range2, &merge_range); |
188 | 1.55k | DCHECK(ret); |
189 | 1.55k | tmp_range.add(merge_range); |
190 | 1.55k | } |
191 | 1.58k | } |
192 | 1.58k | *result = std::move(tmp_range); |
193 | 1.58k | } |
194 | | |
195 | 472 | static roaring::Roaring ranges_to_roaring(const RowRanges& ranges) { |
196 | 472 | roaring::Roaring result; |
197 | 943 | for (auto it = ranges._ranges.begin(); it != ranges._ranges.end(); ++it) { |
198 | 471 | result.addRange(it->from(), it->to()); |
199 | 471 | } |
200 | 472 | return result; |
201 | 472 | } |
202 | | |
203 | 3.14k | size_t count() { return _count; } |
204 | | |
205 | 5.63k | bool is_empty() { return _count == 0; } |
206 | | |
207 | 2 | bool contain(rowid_t from, rowid_t to) { |
208 | | // binary search |
209 | 2 | RowRange tmp_range = RowRange(from, to); |
210 | 2 | size_t start = 0; |
211 | 2 | size_t end = _ranges.size(); |
212 | 3 | while (start <= end) { |
213 | 3 | size_t mid = (start + end) / 2; |
214 | 3 | if (_ranges[mid].is_before(tmp_range)) { |
215 | 0 | start = mid; |
216 | 3 | } else if (_ranges[mid].is_after(tmp_range)) { |
217 | 1 | end = mid - 1; |
218 | 2 | } else { |
219 | 2 | return true; |
220 | 2 | } |
221 | 3 | } |
222 | 0 | return false; |
223 | 2 | } |
224 | | |
225 | 2 | int64_t from() { |
226 | 2 | DCHECK(!is_empty()); |
227 | 2 | return _ranges[0].from(); |
228 | 2 | } |
229 | | |
230 | 2 | int64_t to() { |
231 | 2 | DCHECK(!is_empty()); |
232 | 2 | return _ranges[_ranges.size() - 1].to(); |
233 | 2 | } |
234 | | |
235 | 285 | size_t range_size() const { return _ranges.size(); } |
236 | | |
237 | 166 | RowRange get_range(size_t index) const { return _ranges[index]; } |
238 | | |
239 | 10 | int64_t get_range_from(size_t range_index) const { return _ranges[range_index].from(); } |
240 | | |
241 | 18 | int64_t get_range_to(size_t range_index) const { return _ranges[range_index].to(); } |
242 | | |
243 | 2 | size_t get_range_count(size_t range_index) const { return _ranges[range_index].count(); } |
244 | | |
245 | 0 | std::string to_string() { |
246 | 0 | std::string result; |
247 | 0 | for (auto range : _ranges) { |
248 | 0 | result += range.to_string() + " "; |
249 | 0 | } |
250 | 0 | return result; |
251 | 0 | } |
252 | | |
253 | | // Adds a range to the end of the list of ranges. It maintains the disjunct ascending order(*) of the ranges by |
254 | | // trying to union the specified range to the last ranges in the list. The specified range shall be larger(*) than |
255 | | // the last one or might be overlapped with some of the last ones. |
256 | 3.19k | void add(const RowRange& range) { |
257 | 3.19k | if (range.count() == 0) { |
258 | 0 | return; |
259 | 0 | } |
260 | 3.19k | RowRange range_to_add = range; |
261 | 3.19k | for (int i = cast_set<int>(_ranges.size()) - 1; i >= 0; --i) { |
262 | 45 | const RowRange last = _ranges[i]; |
263 | 45 | DCHECK(!last.is_after(range)); |
264 | 45 | RowRange u; |
265 | 45 | bool ret = RowRange::range_union(last, range_to_add, &u); |
266 | 45 | if (!ret) { |
267 | | // range do not intersect with the last |
268 | 43 | break; |
269 | 43 | } |
270 | 2 | range_to_add = u; |
271 | 2 | _ranges.erase(_ranges.begin() + i); |
272 | 2 | _count -= last.count(); |
273 | 2 | } |
274 | 3.19k | _ranges.emplace_back(range_to_add); |
275 | 3.19k | _count += range_to_add.count(); |
276 | 3.19k | } |
277 | | |
278 | 0 | uint64_t get_digest(uint64_t seed) const { |
279 | 0 | for (auto range : _ranges) { |
280 | 0 | seed = range.get_digest(seed); |
281 | 0 | } |
282 | 0 | return seed; |
283 | 0 | } |
284 | | |
285 | | private: |
286 | | std::vector<RowRange> _ranges; |
287 | | size_t _count; |
288 | | }; |
289 | | |
290 | | } // namespace segment_v2 |
291 | | #include "common/compile_check_end.h" |
292 | | } // namespace doris |