Coverage Report

Created: 2026-03-17 16:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/row_ranges.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <roaring/roaring.hh>
21
#include <string>
22
#include <vector>
23
24
#include "absl/strings/substitute.h"
25
#include "common/cast_set.h"
26
#include "common/logging.h"
27
#include "storage/segment/common.h"
28
29
namespace doris {
30
#include "common/compile_check_begin.h"
31
namespace segment_v2 {
32
33
// RowRange stands for range[From, To), From is inclusive,
34
// To is exclusive. It is used for row id range calculation.
35
class RowRange {
36
public:
37
    // Returns true if two ranges are overlapped or false.
38
    // The union range will be returned through range.
39
123k
    static bool range_union(const RowRange& left, const RowRange& right, RowRange* range) {
40
123k
        if (left._from <= right._from) {
41
123k
            if (left._to >= right._from) {
42
114k
                range->_from = left._from;
43
114k
                range->_to = std::max(left._to, right._to);
44
114k
                return true;
45
114k
            }
46
123k
        } else if (right._to >= left._from) {
47
0
            range->_from = right._from;
48
0
            range->_to = std::max(left._to, right._to);
49
0
            return true;
50
0
        }
51
        // return a invalid range
52
9.61k
        range->_from = 0;
53
9.61k
        range->_to = 0;
54
9.61k
        return false;
55
123k
    }
56
57
    // Returns true if the two ranges are intersected or false.
58
    // The intersection of the two ranges is returned through range.
59
1.42M
    static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) {
60
1.42M
        if (left._from <= right._from) {
61
507k
            if (left._to > right._from) {
62
507k
                range->_from = right._from;
63
507k
                range->_to = std::min(left._to, right._to);
64
507k
                return true;
65
507k
            }
66
918k
        } else if (right._to > left._from) {
67
918k
            range->_from = left._from;
68
918k
            range->_to = std::min(left._to, right._to);
69
918k
            return true;
70
918k
        }
71
        // return a invalid range
72
18.4E
        range->_from = 0;
73
18.4E
        range->_to = 0;
74
18.4E
        return false;
75
1.42M
    }
76
77
1.57M
    RowRange() : _from(0), _to(0) {}
78
79
    // Creates a range of [from, to) (from inclusive and to exclusive; empty ranges are invalid)
80
6.22M
    RowRange(int64_t from, int64_t to) : _from(from), _to(to) {}
81
82
    bool is_valid() const { return _from < _to; }
83
84
37.8M
    size_t count() const { return _to - _from; }
85
86
6.61M
    bool is_before(const RowRange& other) const { return _to <= other._from; }
87
88
6.47M
    bool is_after(const RowRange& other) const { return _from >= other._to; }
89
90
27.9M
    int64_t from() const { return _from; }
91
92
4.47M
    int64_t to() const { return _to; }
93
94
0
    std::string to_string() const { return absl::Substitute("[$0-$1)", _from, _to); }
95
96
1.23M
    uint64_t get_digest(uint64_t seed) const {
97
1.23M
        uint64_t hash = seed;
98
1.23M
        hash = hash * 31 + _from;
99
1.23M
        hash = hash * 31 + _to;
100
1.23M
        return hash;
101
1.23M
    }
102
103
private:
104
    int64_t _from;
105
    int64_t _to;
106
};
107
108
class RowRanges {
109
public:
110
25.1M
    RowRanges() : _count(0) {}
111
112
977k
    void clear() {
113
977k
        _ranges.clear();
114
977k
        _count = 0;
115
977k
    }
116
117
    // Creates a new RowRanges object with the single range [0, row_count).
118
456k
    static RowRanges create_single(uint64_t row_count) {
119
456k
        RowRanges ranges;
120
456k
        ranges.add(RowRange(0, row_count));
121
456k
        return ranges;
122
456k
    }
123
124
    // Creates a new RowRanges object with the single range [from, to).
125
2.24M
    static RowRanges create_single(int64_t from, int64_t to) {
126
2.24M
        DCHECK(from <= to);
127
2.24M
        RowRanges ranges;
128
2.24M
        ranges.add(RowRange(from, to));
129
2.24M
        return ranges;
130
2.24M
    }
131
132
    // Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are
133
    // elements between them. Otherwise, the two disjunct ranges are stored separately.
134
    // For example:
135
    // [113, 241) ∪ [221, 340) = [113, 340)
136
    // [113, 230) ∪ [230, 340) = [113, 340]
137
    // while
138
    // [113, 230) ∪ [231, 340) = [113, 230), [231, 340)
139
2.24M
    static void ranges_union(const RowRanges& left, const RowRanges& right, RowRanges* result) {
140
2.24M
        RowRanges tmp_range;
141
2.24M
        auto it1 = left._ranges.begin();
142
2.24M
        auto it2 = right._ranges.begin();
143
        // merge and add
144
2.34M
        while (it1 != left._ranges.end() && it2 != right._ranges.end()) {
145
97.4k
            if (it1->is_after(*it2)) {
146
0
                tmp_range.add(*it2);
147
0
                ++it2;
148
97.4k
            } else {
149
97.4k
                tmp_range.add(*it1);
150
97.4k
                ++it1;
151
97.4k
            }
152
97.4k
        }
153
2.44M
        while (it1 != left._ranges.end()) {
154
199k
            tmp_range.add(*it1);
155
199k
            ++it1;
156
199k
        }
157
2.59M
        while (it2 != right._ranges.end()) {
158
349k
            tmp_range.add(*it2);
159
349k
            ++it2;
160
349k
        }
161
2.24M
        *result = std::move(tmp_range);
162
2.24M
    }
163
164
    // Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common
165
    // elements otherwise the result is empty.
166
    // For example:
167
    // [113, 241) ∩ [221, 340) = [221, 241)
168
    // while
169
    // [113, 230) ∩ [230, 340) = <EMPTY>
170
    //
171
    // The result RowRanges object will contain all the row indexes there were contained in both of the specified objects
172
    static void ranges_intersection(const RowRanges& left, const RowRanges& right,
173
2.58M
                                    RowRanges* result) {
174
2.58M
        RowRanges tmp_range;
175
2.58M
        int right_index = 0;
176
5.17M
        for (auto it1 = left._ranges.begin(); it1 != left._ranges.end(); ++it1) {
177
2.58M
            const RowRange& range1 = *it1;
178
8.83M
            for (int i = right_index; i < right._ranges.size(); ++i) {
179
6.61M
                const RowRange& range2 = right._ranges[i];
180
6.61M
                if (range1.is_before(range2)) {
181
363k
                    break;
182
6.25M
                } else if (range1.is_after(range2)) {
183
4.82M
                    right_index = i + 1;
184
4.82M
                    continue;
185
4.82M
                }
186
1.42M
                RowRange merge_range;
187
1.42M
                bool ret = RowRange::range_intersection(range1, range2, &merge_range);
188
1.42M
                DCHECK(ret);
189
1.42M
                tmp_range.add(merge_range);
190
1.42M
            }
191
2.58M
        }
192
2.58M
        *result = std::move(tmp_range);
193
2.58M
    }
194
195
    // Calculates the exception (set difference) of the two specified RowRanges objects: left \ right.
196
    // The result contains all row indexes that are in the left ranges but NOT in the right ranges.
197
    // For example:
198
    // [100, 300) \ [150, 200) = [100, 150), [200, 300)
199
    // [100, 300) \ [0, 150) = [150, 300)
200
    // [100, 300) \ [250, 400) = [100, 250)
201
    // [100, 200) \ [200, 300) = [100, 200)
202
    // [100, 300) \ [0, 400) = <EMPTY>
203
    // [100, 200), [300, 400) \ [150, 350) = [100, 150), [350, 400)
204
2.95k
    static void ranges_exception(const RowRanges& left, const RowRanges& right, RowRanges* result) {
205
2.95k
        RowRanges tmp_range;
206
2.95k
        int right_index = 0;
207
5.90k
        for (auto it1 = left._ranges.begin(); it1 != left._ranges.end(); ++it1) {
208
2.95k
            int64_t current_from = it1->from();
209
2.95k
            int64_t current_to = it1->to();
210
4.01k
            for (int i = right_index; i < right._ranges.size(); ++i) {
211
2.96k
                const RowRange& range2 = right._ranges[i];
212
2.96k
                if (current_from >= current_to) {
213
                    // Current range fully consumed
214
1
                    break;
215
1
                }
216
2.95k
                if (current_to <= range2.from()) {
217
                    // Current remaining range is entirely before range2, no more subtraction needed
218
1.90k
                    break;
219
1.90k
                }
220
1.05k
                if (current_from >= range2.to()) {
221
                    // range2 is entirely before the current remaining range, advance right_index
222
6
                    right_index = i + 1;
223
6
                    continue;
224
6
                }
225
                // There is overlap between [current_from, current_to) and range2
226
1.04k
                if (current_from < range2.from()) {
227
                    // Left portion before the overlap: [current_from, range2.from())
228
34
                    tmp_range.add(RowRange(current_from, range2.from()));
229
34
                }
230
                // Advance current_from past the overlap
231
1.04k
                current_from = range2.to();
232
1.04k
            }
233
            // Add whatever remains of the current left range
234
2.95k
            if (current_from < current_to) {
235
1.93k
                tmp_range.add(RowRange(current_from, current_to));
236
1.93k
            }
237
2.95k
        }
238
2.95k
        *result = std::move(tmp_range);
239
2.95k
    }
240
241
2.94M
    static roaring::Roaring ranges_to_roaring(const RowRanges& ranges) {
242
2.94M
        roaring::Roaring result;
243
4.35M
        for (auto it = ranges._ranges.begin(); it != ranges._ranges.end(); ++it) {
244
1.40M
            result.addRange(it->from(), it->to());
245
1.40M
        }
246
2.94M
        return result;
247
2.94M
    }
248
249
3.01M
    size_t count() { return _count; }
250
251
4.47M
    bool is_empty() { return _count == 0; }
252
253
0
    bool contain(rowid_t from, rowid_t to) {
254
0
        // binary search
255
0
        RowRange tmp_range = RowRange(from, to);
256
0
        size_t start = 0;
257
0
        size_t end = _ranges.size();
258
0
        while (start <= end) {
259
0
            size_t mid = (start + end) / 2;
260
0
            if (_ranges[mid].is_before(tmp_range)) {
261
0
                start = mid;
262
0
            } else if (_ranges[mid].is_after(tmp_range)) {
263
0
                end = mid - 1;
264
0
            } else {
265
0
                return true;
266
0
            }
267
0
        }
268
0
        return false;
269
0
    }
270
271
    int64_t from() {
272
        DCHECK(!is_empty());
273
        return _ranges[0].from();
274
    }
275
276
1.17M
    int64_t to() {
277
1.17M
        DCHECK(!is_empty());
278
1.17M
        return _ranges[_ranges.size() - 1].to();
279
1.17M
    }
280
281
2.69M
    size_t range_size() const { return _ranges.size(); }
282
283
1.71M
    RowRange get_range(size_t index) const { return _ranges[index]; }
284
285
81.5k
    int64_t get_range_from(size_t range_index) const { return _ranges[range_index].from(); }
286
287
163k
    int64_t get_range_to(size_t range_index) const { return _ranges[range_index].to(); }
288
289
    size_t get_range_count(size_t range_index) const { return _ranges[range_index].count(); }
290
291
0
    std::string to_string() {
292
0
        std::string result;
293
0
        for (auto range : _ranges) {
294
0
            result += range.to_string() + " ";
295
0
        }
296
0
        return result;
297
0
    }
298
299
    // Adds a range to the end of the list of ranges. It maintains the disjunct ascending order(*) of the ranges by
300
    // trying to union the specified range to the last ranges in the list. The specified range shall be larger(*) than
301
    // the last one or might be overlapped with some of the last ones.
302
8.28M
    void add(const RowRange& range) {
303
8.28M
        if (range.count() == 0) {
304
1.90M
            return;
305
1.90M
        }
306
6.38M
        RowRange range_to_add = range;
307
6.49M
        for (int i = cast_set<int>(_ranges.size()) - 1; i >= 0; --i) {
308
123k
            const RowRange last = _ranges[i];
309
123k
            DCHECK(!last.is_after(range));
310
123k
            RowRange u;
311
123k
            bool ret = RowRange::range_union(last, range_to_add, &u);
312
123k
            if (!ret) {
313
                // range do not intersect with the last
314
9.59k
                break;
315
9.59k
            }
316
114k
            range_to_add = u;
317
114k
            _ranges.erase(_ranges.begin() + i);
318
114k
            _count -= last.count();
319
114k
        }
320
6.38M
        _ranges.emplace_back(range_to_add);
321
6.38M
        _count += range_to_add.count();
322
6.38M
    }
323
324
    // Returns the row index (within the original row space) of the pos-th element
325
    // across all ranges. For example, if ranges are [0,3000) and [8000,11000),
326
    // pos=0 returns 0, pos=2999 returns 2999, pos=3000 returns 8000.
327
23.0M
    int64_t get_row_index_by_pos(int64_t pos) const {
328
23.0M
        DORIS_CHECK(pos < _count);
329
23.0M
        size_t remaining = pos;
330
23.0M
        for (const auto& range : _ranges) {
331
23.0M
            size_t range_len = range.count();
332
23.0M
            if (remaining < range_len) {
333
23.0M
                return range.from() + remaining;
334
23.0M
            }
335
10.8k
            remaining -= range_len;
336
10.8k
        }
337
        // pos is out of bounds; return -1 to indicate invalid
338
18.4E
        DCHECK(false) << "pos " << pos << " is out of bounds for RowRanges with count " << _count;
339
18.4E
        return -1;
340
23.0M
    }
341
342
1.23M
    uint64_t get_digest(uint64_t seed) const {
343
1.23M
        for (auto range : _ranges) {
344
1.23M
            seed = range.get_digest(seed);
345
1.23M
        }
346
1.23M
        return seed;
347
1.23M
    }
348
349
private:
350
    std::vector<RowRange> _ranges;
351
    size_t _count;
352
};
353
354
} // namespace segment_v2
355
#include "common/compile_check_end.h"
356
} // namespace doris