Coverage Report

Created: 2026-07-04 09:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/condition_cache.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <glog/logging.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <atomic>
26
#include <memory>
27
#include <roaring/roaring.hh>
28
#include <string>
29
#include <vector>
30
31
#include "common/config.h"
32
#include "common/status.h"
33
#include "io/fs/file_system.h"
34
#include "io/fs/path.h"
35
#include "runtime/exec_env.h"
36
#include "runtime/memory/lru_cache_policy.h"
37
#include "runtime/memory/mem_tracker.h"
38
#include "util/lru_cache.h"
39
#include "util/slice.h"
40
#include "util/time.h"
41
42
namespace doris {
43
44
// Context passed from scan/table-reader layers to physical readers for condition cache
45
// integration. On MISS, readers set filter_result[granule] to true when row-level predicates keep
46
// at least one row in that granule. On HIT, readers skip granules whose cached bit is false.
47
struct ConditionCacheContext {
48
    bool is_hit = false;
49
    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
50
    int64_t base_granule = 0;                         // global granule index of filter_result[0]
51
    static constexpr int GRANULE_SIZE = 2048;
52
};
53
54
namespace segment_v2 {
55
56
class ConditionCacheHandle;
57
58
class ConditionCache : public LRUCachePolicy {
59
public:
60
    using LRUCachePolicy::insert;
61
62
    // The cache key or segment lru cache
63
    struct CacheKey {
64
        CacheKey(RowsetId rowset_id_, int64_t segment_id_, uint64_t digest_)
65
2.25M
                : rowset_id(rowset_id_), segment_id(segment_id_), digest(digest_) {}
66
        RowsetId rowset_id;
67
        int64_t segment_id;
68
        uint64_t digest;
69
70
        // Encode to a flat binary which can be used as LRUCache's key
71
2.25M
        [[nodiscard]] std::string encode() const {
72
2.25M
            char buf[16];
73
2.25M
            memcpy(buf, &segment_id, 8);
74
2.25M
            memcpy(buf + 8, &digest, 8);
75
76
2.25M
            return rowset_id.to_string() + std::string(buf, 16);
77
2.25M
        }
78
    };
79
80
    class CacheValue : public LRUCacheValueBase {
81
    public:
82
        std::shared_ptr<std::vector<bool>> filter_result;
83
    };
84
85
    // Cache key for external tables (Hive ORC/Parquet)
86
    struct ExternalCacheKey {
87
3.24k
        ExternalCacheKey() = default;
88
        ExternalCacheKey(const std::string& path_, int64_t modification_time_, int64_t file_size_,
89
                         uint64_t digest_, int64_t start_offset_, int64_t size_)
90
21
                : path(path_),
91
21
                  modification_time(modification_time_),
92
21
                  file_size(file_size_),
93
21
                  digest(digest_),
94
21
                  start_offset(start_offset_),
95
21
                  size(size_) {}
96
        std::string path;
97
        int64_t modification_time = 0;
98
        int64_t file_size = 0;
99
        uint64_t digest = 0;
100
        int64_t start_offset = 0;
101
        int64_t size = 0;
102
103
29
        [[nodiscard]] std::string encode() const {
104
29
            std::string key = path;
105
29
            char buf[40];
106
29
            memcpy(buf, &modification_time, 8);
107
29
            memcpy(buf + 8, &file_size, 8);
108
29
            memcpy(buf + 16, &digest, 8);
109
29
            memcpy(buf + 24, &start_offset, 8);
110
29
            memcpy(buf + 32, &size, 8);
111
29
            key.append(buf, 40);
112
29
            return key;
113
29
        }
114
    };
115
116
    // Create global instance of this class
117
18
    static ConditionCache* create_global_cache(size_t capacity, uint32_t num_shards = 16) {
118
18
        auto* res = new ConditionCache(capacity, num_shards);
119
18
        return res;
120
18
    }
121
122
    // Return global instance.
123
    // Client should call create_global_cache before.
124
2.26M
    static ConditionCache* instance() { return ExecEnv::GetInstance()->get_condition_cache(); }
125
126
    ConditionCache() = delete;
127
128
    ConditionCache(size_t capacity, uint32_t num_shards)
129
18
            : LRUCachePolicy(CachePolicy::CacheType::CONDITION_CACHE, capacity, LRUCacheType::SIZE,
130
18
                             config::inverted_index_cache_stale_sweep_time_sec, num_shards,
131
18
                             /*element_count_capacity*/ 0, /*enable_prune*/ true,
132
18
                             /*is_lru_k*/ true) {}
133
134
    template <typename KeyType>
135
    bool lookup(const KeyType& key, ConditionCacheHandle* handle);
136
137
    template <typename KeyType>
138
    void insert(const KeyType& key, std::shared_ptr<std::vector<bool>> filter_result);
139
};
140
141
class ConditionCacheHandle {
142
public:
143
1.18M
    ConditionCacheHandle() = default;
144
145
    ConditionCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
146
1.16M
            : _cache(cache), _handle(handle) {}
147
148
2.34M
    ~ConditionCacheHandle() {
149
2.34M
        if (_handle != nullptr) {
150
1.16M
            _cache->release(_handle);
151
1.16M
        }
152
2.34M
    }
153
154
0
    ConditionCacheHandle(ConditionCacheHandle&& other) noexcept {
155
0
        // we can use std::exchange if we switch c++14 on
156
0
        std::swap(_cache, other._cache);
157
0
        std::swap(_handle, other._handle);
158
0
    }
159
160
83.7k
    ConditionCacheHandle& operator=(ConditionCacheHandle&& other) noexcept {
161
83.7k
        std::swap(_cache, other._cache);
162
83.7k
        std::swap(_handle, other._handle);
163
83.7k
        return *this;
164
83.7k
    }
165
166
0
    LRUCachePolicy* cache() const { return _cache; }
167
168
83.0k
    std::shared_ptr<std::vector<bool>> get_filter_result() const {
169
83.0k
        if (!_cache) {
170
0
            return nullptr;
171
0
        }
172
83.0k
        return ((ConditionCache::CacheValue*)_cache->value(_handle))->filter_result;
173
83.0k
    }
174
175
private:
176
    LRUCachePolicy* _cache = nullptr;
177
    Cache::Handle* _handle = nullptr;
178
179
    // Don't allow copy and assign
180
    DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle);
181
};
182
183
} // namespace segment_v2
184
} // namespace doris