be/src/storage/segment/condition_cache.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <atomic> |
26 | | #include <memory> |
27 | | #include <roaring/roaring.hh> |
28 | | #include <string> |
29 | | #include <vector> |
30 | | |
31 | | #include "common/config.h" |
32 | | #include "common/status.h" |
33 | | #include "io/fs/file_system.h" |
34 | | #include "io/fs/path.h" |
35 | | #include "runtime/exec_env.h" |
36 | | #include "runtime/memory/lru_cache_policy.h" |
37 | | #include "runtime/memory/mem_tracker.h" |
38 | | #include "util/lru_cache.h" |
39 | | #include "util/slice.h" |
40 | | #include "util/time.h" |
41 | | |
42 | | namespace doris { |
43 | | |
44 | | // Context passed from scan/table-reader layers to physical readers for condition cache |
45 | | // integration. On MISS, readers set filter_result[granule] to true when row-level predicates keep |
46 | | // at least one row in that granule. On HIT, readers skip granules whose cached bit is false. |
47 | | struct ConditionCacheContext { |
48 | | bool is_hit = false; |
49 | | std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows |
50 | | int64_t base_granule = 0; // global granule index of filter_result[0] |
51 | | static constexpr int GRANULE_SIZE = 2048; |
52 | | }; |
53 | | |
54 | | namespace segment_v2 { |
55 | | |
56 | | class ConditionCacheHandle; |
57 | | |
58 | | class ConditionCache : public LRUCachePolicy { |
59 | | public: |
60 | | using LRUCachePolicy::insert; |
61 | | |
62 | | // The cache key or segment lru cache |
63 | | struct CacheKey { |
64 | | CacheKey(RowsetId rowset_id_, int64_t segment_id_, uint64_t digest_) |
65 | 2.25M | : rowset_id(rowset_id_), segment_id(segment_id_), digest(digest_) {} |
66 | | RowsetId rowset_id; |
67 | | int64_t segment_id; |
68 | | uint64_t digest; |
69 | | |
70 | | // Encode to a flat binary which can be used as LRUCache's key |
71 | 2.25M | [[nodiscard]] std::string encode() const { |
72 | 2.25M | char buf[16]; |
73 | 2.25M | memcpy(buf, &segment_id, 8); |
74 | 2.25M | memcpy(buf + 8, &digest, 8); |
75 | | |
76 | 2.25M | return rowset_id.to_string() + std::string(buf, 16); |
77 | 2.25M | } |
78 | | }; |
79 | | |
80 | | class CacheValue : public LRUCacheValueBase { |
81 | | public: |
82 | | std::shared_ptr<std::vector<bool>> filter_result; |
83 | | }; |
84 | | |
85 | | // Cache key for external tables (Hive ORC/Parquet) |
86 | | struct ExternalCacheKey { |
87 | 3.24k | ExternalCacheKey() = default; |
88 | | ExternalCacheKey(const std::string& path_, int64_t modification_time_, int64_t file_size_, |
89 | | uint64_t digest_, int64_t start_offset_, int64_t size_) |
90 | 21 | : path(path_), |
91 | 21 | modification_time(modification_time_), |
92 | 21 | file_size(file_size_), |
93 | 21 | digest(digest_), |
94 | 21 | start_offset(start_offset_), |
95 | 21 | size(size_) {} |
96 | | std::string path; |
97 | | int64_t modification_time = 0; |
98 | | int64_t file_size = 0; |
99 | | uint64_t digest = 0; |
100 | | int64_t start_offset = 0; |
101 | | int64_t size = 0; |
102 | | |
103 | 29 | [[nodiscard]] std::string encode() const { |
104 | 29 | std::string key = path; |
105 | 29 | char buf[40]; |
106 | 29 | memcpy(buf, &modification_time, 8); |
107 | 29 | memcpy(buf + 8, &file_size, 8); |
108 | 29 | memcpy(buf + 16, &digest, 8); |
109 | 29 | memcpy(buf + 24, &start_offset, 8); |
110 | 29 | memcpy(buf + 32, &size, 8); |
111 | 29 | key.append(buf, 40); |
112 | 29 | return key; |
113 | 29 | } |
114 | | }; |
115 | | |
116 | | // Create global instance of this class |
117 | 18 | static ConditionCache* create_global_cache(size_t capacity, uint32_t num_shards = 16) { |
118 | 18 | auto* res = new ConditionCache(capacity, num_shards); |
119 | 18 | return res; |
120 | 18 | } |
121 | | |
122 | | // Return global instance. |
123 | | // Client should call create_global_cache before. |
124 | 2.26M | static ConditionCache* instance() { return ExecEnv::GetInstance()->get_condition_cache(); } |
125 | | |
126 | | ConditionCache() = delete; |
127 | | |
128 | | ConditionCache(size_t capacity, uint32_t num_shards) |
129 | 18 | : LRUCachePolicy(CachePolicy::CacheType::CONDITION_CACHE, capacity, LRUCacheType::SIZE, |
130 | 18 | config::inverted_index_cache_stale_sweep_time_sec, num_shards, |
131 | 18 | /*element_count_capacity*/ 0, /*enable_prune*/ true, |
132 | 18 | /*is_lru_k*/ true) {} |
133 | | |
134 | | template <typename KeyType> |
135 | | bool lookup(const KeyType& key, ConditionCacheHandle* handle); |
136 | | |
137 | | template <typename KeyType> |
138 | | void insert(const KeyType& key, std::shared_ptr<std::vector<bool>> filter_result); |
139 | | }; |
140 | | |
141 | | class ConditionCacheHandle { |
142 | | public: |
143 | 1.18M | ConditionCacheHandle() = default; |
144 | | |
145 | | ConditionCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle) |
146 | 1.16M | : _cache(cache), _handle(handle) {} |
147 | | |
148 | 2.34M | ~ConditionCacheHandle() { |
149 | 2.34M | if (_handle != nullptr) { |
150 | 1.16M | _cache->release(_handle); |
151 | 1.16M | } |
152 | 2.34M | } |
153 | | |
154 | 0 | ConditionCacheHandle(ConditionCacheHandle&& other) noexcept { |
155 | 0 | // we can use std::exchange if we switch c++14 on |
156 | 0 | std::swap(_cache, other._cache); |
157 | 0 | std::swap(_handle, other._handle); |
158 | 0 | } |
159 | | |
160 | 83.7k | ConditionCacheHandle& operator=(ConditionCacheHandle&& other) noexcept { |
161 | 83.7k | std::swap(_cache, other._cache); |
162 | 83.7k | std::swap(_handle, other._handle); |
163 | 83.7k | return *this; |
164 | 83.7k | } |
165 | | |
166 | 0 | LRUCachePolicy* cache() const { return _cache; } |
167 | | |
168 | 83.0k | std::shared_ptr<std::vector<bool>> get_filter_result() const { |
169 | 83.0k | if (!_cache) { |
170 | 0 | return nullptr; |
171 | 0 | } |
172 | 83.0k | return ((ConditionCache::CacheValue*)_cache->value(_handle))->filter_result; |
173 | 83.0k | } |
174 | | |
175 | | private: |
176 | | LRUCachePolicy* _cache = nullptr; |
177 | | Cache::Handle* _handle = nullptr; |
178 | | |
179 | | // Don't allow copy and assign |
180 | | DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle); |
181 | | }; |
182 | | |
183 | | } // namespace segment_v2 |
184 | | } // namespace doris |