Coverage Report

Created: 2026-04-16 16:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/row_ranges.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <roaring/roaring.hh>
21
#include <string>
22
#include <vector>
23
24
#include "absl/strings/substitute.h"
25
#include "common/cast_set.h"
26
#include "common/logging.h"
27
#include "storage/segment/common.h"
28
29
namespace doris {
30
namespace segment_v2 {
31
32
// RowRange stands for range[From, To), From is inclusive,
33
// To is exclusive. It is used for row id range calculation.
34
class RowRange {
35
public:
36
    // Returns true if two ranges are overlapped or false.
37
    // The union range will be returned through range.
38
72
    static bool range_union(const RowRange& left, const RowRange& right, RowRange* range) {
39
72
        if (left._from <= right._from) {
40
72
            if (left._to >= right._from) {
41
5
                range->_from = left._from;
42
5
                range->_to = std::max(left._to, right._to);
43
5
                return true;
44
5
            }
45
72
        } else if (right._to >= left._from) {
46
0
            range->_from = right._from;
47
0
            range->_to = std::max(left._to, right._to);
48
0
            return true;
49
0
        }
50
        // return a invalid range
51
67
        range->_from = 0;
52
67
        range->_to = 0;
53
67
        return false;
54
72
    }
55
56
    // Returns true if the two ranges are intersected or false.
57
    // The intersection of the two ranges is returned through range.
58
1.56k
    static bool range_intersection(const RowRange& left, const RowRange& right, RowRange* range) {
59
1.56k
        if (left._from <= right._from) {
60
1.54k
            if (left._to > right._from) {
61
1.53k
                range->_from = right._from;
62
1.53k
                range->_to = std::min(left._to, right._to);
63
1.53k
                return true;
64
1.53k
            }
65
1.54k
        } else if (right._to > left._from) {
66
22
            range->_from = left._from;
67
22
            range->_to = std::min(left._to, right._to);
68
22
            return true;
69
22
        }
70
        // return a invalid range
71
2
        range->_from = 0;
72
2
        range->_to = 0;
73
2
        return false;
74
1.56k
    }
75
76
1.63k
    RowRange() : _from(0), _to(0) {}
77
78
    // Creates a range of [from, to) (from inclusive and to exclusive; empty ranges are invalid)
79
1.75k
    RowRange(int64_t from, int64_t to) : _from(from), _to(to) {}
80
81
6
    bool is_valid() const { return _from < _to; }
82
83
6.63k
    size_t count() const { return _to - _from; }
84
85
1.65k
    bool is_before(const RowRange& other) const { return _to <= other._from; }
86
87
1.72k
    bool is_after(const RowRange& other) const { return _from >= other._to; }
88
89
984
    int64_t from() const { return _from; }
90
91
818
    int64_t to() const { return _to; }
92
93
0
    std::string to_string() const { return absl::Substitute("[$0-$1)", _from, _to); }
94
95
0
    uint64_t get_digest(uint64_t seed) const {
96
0
        uint64_t hash = seed;
97
0
        hash = hash * 31 + _from;
98
0
        hash = hash * 31 + _to;
99
0
        return hash;
100
0
    }
101
102
private:
103
    int64_t _from;
104
    int64_t _to;
105
};
106
107
class RowRanges {
108
public:
109
11.1k
    RowRanges() : _count(0) {}
110
111
5.64k
    void clear() {
112
5.64k
        _ranges.clear();
113
5.64k
        _count = 0;
114
5.64k
    }
115
116
    // Creates a new RowRanges object with the single range [0, row_count).
117
1.40k
    static RowRanges create_single(uint64_t row_count) {
118
1.40k
        RowRanges ranges;
119
1.40k
        ranges.add(RowRange(0, row_count));
120
1.40k
        return ranges;
121
1.40k
    }
122
123
    // Creates a new RowRanges object with the single range [from, to).
124
26
    static RowRanges create_single(int64_t from, int64_t to) {
125
26
        DCHECK(from <= to);
126
26
        RowRanges ranges;
127
26
        ranges.add(RowRange(from, to));
128
26
        return ranges;
129
26
    }
130
131
    // Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are
132
    // elements between them. Otherwise, the two disjunct ranges are stored separately.
133
    // For example:
134
    // [113, 241) ∪ [221, 340) = [113, 340)
135
    // [113, 230) ∪ [230, 340) = [113, 340]
136
    // while
137
    // [113, 230) ∪ [231, 340) = [113, 230), [231, 340)
138
3
    static void ranges_union(const RowRanges& left, const RowRanges& right, RowRanges* result) {
139
3
        RowRanges tmp_range;
140
3
        auto it1 = left._ranges.begin();
141
3
        auto it2 = right._ranges.begin();
142
        // merge and add
143
6
        while (it1 != left._ranges.end() && it2 != right._ranges.end()) {
144
3
            if (it1->is_after(*it2)) {
145
0
                tmp_range.add(*it2);
146
0
                ++it2;
147
3
            } else {
148
3
                tmp_range.add(*it1);
149
3
                ++it1;
150
3
            }
151
3
        }
152
3
        while (it1 != left._ranges.end()) {
153
0
            tmp_range.add(*it1);
154
0
            ++it1;
155
0
        }
156
6
        while (it2 != right._ranges.end()) {
157
3
            tmp_range.add(*it2);
158
3
            ++it2;
159
3
        }
160
3
        *result = std::move(tmp_range);
161
3
    }
162
163
    // Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common
164
    // elements otherwise the result is empty.
165
    // For example:
166
    // [113, 241) ∩ [221, 340) = [221, 241)
167
    // while
168
    // [113, 230) ∩ [230, 340) = <EMPTY>
169
    //
170
    // The result RowRanges object will contain all the row indexes there were contained in both of the specified objects
171
    static void ranges_intersection(const RowRanges& left, const RowRanges& right,
172
1.59k
                                    RowRanges* result) {
173
1.59k
        RowRanges tmp_range;
174
1.59k
        int right_index = 0;
175
3.18k
        for (auto it1 = left._ranges.begin(); it1 != left._ranges.end(); ++it1) {
176
1.59k
            const RowRange& range1 = *it1;
177
3.24k
            for (int i = right_index; i < right._ranges.size(); ++i) {
178
1.65k
                const RowRange& range2 = right._ranges[i];
179
1.65k
                if (range1.is_before(range2)) {
180
3
                    break;
181
1.64k
                } else if (range1.is_after(range2)) {
182
90
                    right_index = i + 1;
183
90
                    continue;
184
90
                }
185
1.55k
                RowRange merge_range;
186
1.55k
                bool ret = RowRange::range_intersection(range1, range2, &merge_range);
187
1.55k
                DCHECK(ret);
188
1.55k
                tmp_range.add(merge_range);
189
1.55k
            }
190
1.59k
        }
191
1.59k
        *result = std::move(tmp_range);
192
1.59k
    }
193
194
    // Calculates the exception (set difference) of the two specified RowRanges objects: left \ right.
195
    // The result contains all row indexes that are in the left ranges but NOT in the right ranges.
196
    // For example:
197
    // [100, 300) \ [150, 200) = [100, 150), [200, 300)
198
    // [100, 300) \ [0, 150) = [150, 300)
199
    // [100, 300) \ [250, 400) = [100, 250)
200
    // [100, 200) \ [200, 300) = [100, 200)
201
    // [100, 300) \ [0, 400) = <EMPTY>
202
    // [100, 200), [300, 400) \ [150, 350) = [100, 150), [350, 400)
203
34
    static void ranges_exception(const RowRanges& left, const RowRanges& right, RowRanges* result) {
204
34
        RowRanges tmp_range;
205
34
        int right_index = 0;
206
73
        for (auto it1 = left._ranges.begin(); it1 != left._ranges.end(); ++it1) {
207
39
            int64_t current_from = it1->from();
208
39
            int64_t current_to = it1->to();
209
70
            for (int i = right_index; i < right._ranges.size(); ++i) {
210
36
                const RowRange& range2 = right._ranges[i];
211
36
                if (current_from >= current_to) {
212
                    // Current range fully consumed
213
1
                    break;
214
1
                }
215
35
                if (current_to <= range2.from()) {
216
                    // Current remaining range is entirely before range2, no more subtraction needed
217
4
                    break;
218
4
                }
219
31
                if (current_from >= range2.to()) {
220
                    // range2 is entirely before the current remaining range, advance right_index
221
6
                    right_index = i + 1;
222
6
                    continue;
223
6
                }
224
                // There is overlap between [current_from, current_to) and range2
225
25
                if (current_from < range2.from()) {
226
                    // Left portion before the overlap: [current_from, range2.from())
227
14
                    tmp_range.add(RowRange(current_from, range2.from()));
228
14
                }
229
                // Advance current_from past the overlap
230
25
                current_from = range2.to();
231
25
            }
232
            // Add whatever remains of the current left range
233
39
            if (current_from < current_to) {
234
26
                tmp_range.add(RowRange(current_from, current_to));
235
26
            }
236
39
        }
237
34
        *result = std::move(tmp_range);
238
34
    }
239
240
475
    static roaring::Roaring ranges_to_roaring(const RowRanges& ranges) {
241
475
        roaring::Roaring result;
242
951
        for (auto it = ranges._ranges.begin(); it != ranges._ranges.end(); ++it) {
243
476
            result.addRange(it->from(), it->to());
244
476
        }
245
475
        return result;
246
475
    }
247
248
3.19k
    size_t count() { return _count; }
249
250
5.64k
    bool is_empty() { return _count == 0; }
251
252
2
    bool contain(rowid_t from, rowid_t to) {
253
        // binary search
254
2
        RowRange tmp_range = RowRange(from, to);
255
2
        size_t start = 0;
256
2
        size_t end = _ranges.size();
257
3
        while (start <= end) {
258
3
            size_t mid = (start + end) / 2;
259
3
            if (_ranges[mid].is_before(tmp_range)) {
260
0
                start = mid;
261
3
            } else if (_ranges[mid].is_after(tmp_range)) {
262
1
                end = mid - 1;
263
2
            } else {
264
2
                return true;
265
2
            }
266
3
        }
267
0
        return false;
268
2
    }
269
270
2
    int64_t from() {
271
2
        DCHECK(!is_empty());
272
2
        return _ranges[0].from();
273
2
    }
274
275
2
    int64_t to() {
276
2
        DCHECK(!is_empty());
277
2
        return _ranges[_ranges.size() - 1].to();
278
2
    }
279
280
314
    size_t range_size() const { return _ranges.size(); }
281
282
170
    RowRange get_range(size_t index) const { return _ranges[index]; }
283
284
37
    int64_t get_range_from(size_t range_index) const { return _ranges[range_index].from(); }
285
286
45
    int64_t get_range_to(size_t range_index) const { return _ranges[range_index].to(); }
287
288
2
    size_t get_range_count(size_t range_index) const { return _ranges[range_index].count(); }
289
290
0
    std::string to_string() {
291
0
        std::string result;
292
0
        for (auto range : _ranges) {
293
0
            result += range.to_string() + " ";
294
0
        }
295
0
        return result;
296
0
    }
297
298
    // Adds a range to the end of the list of ranges. It maintains the disjunct ascending order(*) of the ranges by
299
    // trying to union the specified range to the last ranges in the list. The specified range shall be larger(*) than
300
    // the last one or might be overlapped with some of the last ones.
301
3.31k
    void add(const RowRange& range) {
302
3.31k
        if (range.count() == 0) {
303
0
            return;
304
0
        }
305
3.31k
        RowRange range_to_add = range;
306
3.31k
        for (int i = cast_set<int>(_ranges.size()) - 1; i >= 0; --i) {
307
70
            const RowRange last = _ranges[i];
308
70
            DCHECK(!last.is_after(range));
309
70
            RowRange u;
310
70
            bool ret = RowRange::range_union(last, range_to_add, &u);
311
70
            if (!ret) {
312
                // range do not intersect with the last
313
66
                break;
314
66
            }
315
4
            range_to_add = u;
316
4
            _ranges.erase(_ranges.begin() + i);
317
4
            _count -= last.count();
318
4
        }
319
3.31k
        _ranges.emplace_back(range_to_add);
320
3.31k
        _count += range_to_add.count();
321
3.31k
    }
322
323
    // Returns the row index (within the original row space) of the pos-th element
324
    // across all ranges. For example, if ranges are [0,3000) and [8000,11000),
325
    // pos=0 returns 0, pos=2999 returns 2999, pos=3000 returns 8000.
326
0
    int64_t get_row_index_by_pos(int64_t pos) const {
327
0
        DORIS_CHECK(pos < _count);
328
0
        size_t remaining = pos;
329
0
        for (const auto& range : _ranges) {
330
0
            size_t range_len = range.count();
331
0
            if (remaining < range_len) {
332
0
                return range.from() + remaining;
333
0
            }
334
0
            remaining -= range_len;
335
0
        }
336
        // pos is out of bounds; return -1 to indicate invalid
337
0
        DCHECK(false) << "pos " << pos << " is out of bounds for RowRanges with count " << _count;
338
0
        return -1;
339
0
    }
340
341
0
    uint64_t get_digest(uint64_t seed) const {
342
0
        for (auto range : _ranges) {
343
0
            seed = range.get_digest(seed);
344
0
        }
345
0
        return seed;
346
0
    }
347
348
private:
349
    std::vector<RowRange> _ranges;
350
    size_t _count;
351
};
352
353
} // namespace segment_v2
354
} // namespace doris