Coverage Report

Created: 2025-07-25 20:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/segment_loader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <glog/logging.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <atomic>
26
#include <memory>
27
#include <ostream>
28
#include <string>
29
#include <utility>
30
#include <vector>
31
32
#include "common/cast_set.h"
33
#include "common/status.h"
34
#include "olap/lru_cache.h"
35
#include "olap/olap_common.h" // for rowset id
36
#include "olap/rowset/segment_v2/segment.h"
37
#include "runtime/memory/lru_cache_policy.h"
38
#include "util/time.h"
39
40
namespace doris {
41
#include "common/compile_check_begin.h"
42
43
class SegmentCacheHandle;
44
class BetaRowset;
45
46
// SegmentLoader is used to load the Segment of BetaRowset.
47
// An LRUCache is encapsulated inside it, which is used to cache the opened segments.
48
// The caller should use the following method to load and obtain
49
// the segments of a specified rowset:
50
//
51
//  SegmentCacheHandle cache_handle;
52
//  RETURN_IF_ERROR(SegmentCache::instance()->load_segments(_rowset, &cache_handle));
53
//  for (auto& seg_ptr : cache_handle.value()->segments) {
54
//      ... visit segment ...
55
//  }
56
//
57
// Make sure that cache_handle is valid during the segment usage period.
58
using BetaRowsetSharedPtr = std::shared_ptr<BetaRowset>;
59
60
class SegmentCache : public LRUCachePolicy {
61
public:
62
    using LRUCachePolicy::insert;
63
    // The cache key or segment lru cache
64
    struct CacheKey {
65
        CacheKey(RowsetId rowset_id_, int64_t segment_id_)
66
6.34k
                : rowset_id(rowset_id_), segment_id(segment_id_) {}
67
        RowsetId rowset_id;
68
        int64_t segment_id;
69
70
        // Encode to a flat binary which can be used as LRUCache's key
71
10.5k
        [[nodiscard]] std::string encode() const {
72
10.5k
            return rowset_id.to_string() + std::to_string(segment_id);
73
10.5k
        }
74
    };
75
76
    // The cache value of segment lru cache.
77
    // Holding all opened segments of a rowset.
78
    class CacheValue : public LRUCacheValueBase {
79
    public:
80
4.18k
        CacheValue(segment_v2::SegmentSharedPtr segment_) : segment(std::move(segment_)) {}
81
82
        const segment_v2::SegmentSharedPtr segment;
83
    };
84
85
    SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
86
1
            : LRUCachePolicy(
87
1
                      CachePolicy::CacheType::SEGMENT_CACHE, memory_bytes_limit, LRUCacheType::SIZE,
88
1
                      config::tablet_rowset_stale_sweep_time_sec, DEFAULT_LRU_CACHE_NUM_SHARDS * 2,
89
1
                      cast_set<uint32_t>(segment_num_limit), config::enable_segment_cache_prune) {}
90
91
    // Lookup the given segment in the cache.
92
    // If the segment is found, the cache entry will be written into handle.
93
    // Return true if entry is found, otherwise return false.
94
    bool lookup(const SegmentCache::CacheKey& key, SegmentCacheHandle* handle);
95
96
    // Insert a cache entry by key.
97
    // And the cache entry will be returned in handle.
98
    // This function is thread-safe.
99
    void insert(const SegmentCache::CacheKey& key, CacheValue& value, SegmentCacheHandle* handle);
100
101
    void erase(const SegmentCache::CacheKey& key);
102
};
103
104
class SegmentLoader {
105
public:
106
    static SegmentLoader* instance();
107
108
    // Create global instance of this class.
109
    // "capacity" is the capacity of lru cache.
110
    // TODO: Currently we use the number of rowset as the cache capacity.
111
    // That is, the limit of cache is the number of rowset.
112
    // This is because currently we cannot accurately estimate the memory occupied by a segment.
113
    // After the estimation of segment memory usage is provided later, it is recommended
114
    // to use Memory as the capacity limit of the cache.
115
116
1
    SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) {
117
1
        _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, segment_num_count);
118
1
    }
119
120
    // Load segments of "rowset", return the "cache_handle" which contains segments.
121
    // If use_cache is true, it will be loaded from _cache.
122
    Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle,
123
                         bool use_cache = false, bool need_load_pk_index_and_bf = false,
124
                         OlapReaderStatistics* index_load_stats = nullptr);
125
126
    // Load one segment of "rowset", return the "cache_handle" which contains segments.
127
    // If use_cache is true, it will be loaded from _cache.
128
    Status load_segment(const BetaRowsetSharedPtr& rowset, int64_t segment_id,
129
                        SegmentCacheHandle* cache_handle, bool use_cache = false,
130
                        bool need_load_pk_index_and_bf = false,
131
                        OlapReaderStatistics* index_load_stats = nullptr);
132
133
    void erase_segment(const SegmentCache::CacheKey& key);
134
135
    void erase_segments(const RowsetId& rowset_id, int64_t num_segments);
136
137
2
    int64_t cache_mem_usage() const {
138
2
#ifdef BE_TEST
139
2
        return _cache_mem_usage;
140
#else
141
        return _segment_cache->value_mem_consumption();
142
#endif
143
2
    }
144
145
private:
146
    SegmentLoader();
147
    std::unique_ptr<SegmentCache> _segment_cache;
148
    // Just used for BE UT
149
    int64_t _cache_mem_usage = 0;
150
};
151
152
// A handle for a single rowset from segment lru cache.
153
// The handle can ensure that the segment is valid
154
// and will not be closed while the holder of the handle is accessing the segment.
155
// The handle will automatically release the cache entry when it is destroyed.
156
// So the caller need to make sure the handle is valid in lifecycle.
157
class SegmentCacheHandle {
158
public:
159
6.33k
    SegmentCacheHandle() = default;
160
6.33k
    ~SegmentCacheHandle() = default;
161
162
4.18k
    void push_segment(LRUCachePolicy* cache, Cache::Handle* handle) {
163
4.18k
        segments.push_back(((SegmentCache::CacheValue*)cache->value(handle))->segment);
164
4.18k
        cache->release(handle);
165
4.18k
    }
166
167
2.15k
    void push_segment(segment_v2::SegmentSharedPtr segment) {
168
2.15k
        segments.push_back(std::move(segment));
169
2.15k
    }
170
171
6.33k
    std::vector<segment_v2::SegmentSharedPtr>& get_segments() { return segments; }
172
173
17
    [[nodiscard]] bool is_inited() const { return _init; }
174
175
16
    void set_inited() {
176
16
        DCHECK(!_init);
177
16
        _init = true;
178
16
    }
179
180
0
    segment_v2::SegmentSharedPtr pop_unhealthy_segment() {
181
0
        if (segments.empty()) {
182
0
            return nullptr;
183
0
        }
184
0
        segment_v2::SegmentSharedPtr last_segment = segments.back();
185
0
        if (last_segment->healthy_status().ok()) {
186
0
            return nullptr;
187
0
        }
188
0
        segments.pop_back();
189
0
        return last_segment;
190
0
    }
191
192
private:
193
    std::vector<segment_v2::SegmentSharedPtr> segments;
194
    bool _init {false};
195
196
    // Don't allow copy and assign
197
    DISALLOW_COPY_AND_ASSIGN(SegmentCacheHandle);
198
};
199
200
#include "common/compile_check_end.h"
201
} // namespace doris