be/src/storage/segment/condition_cache.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <atomic> |
26 | | #include <memory> |
27 | | #include <roaring/roaring.hh> |
28 | | #include <string> |
29 | | |
30 | | #include "common/config.h" |
31 | | #include "common/status.h" |
32 | | #include "io/fs/file_system.h" |
33 | | #include "io/fs/path.h" |
34 | | #include "runtime/exec_env.h" |
35 | | #include "runtime/memory/lru_cache_policy.h" |
36 | | #include "runtime/memory/mem_tracker.h" |
37 | | #include "util/lru_cache.h" |
38 | | #include "util/slice.h" |
39 | | #include "util/time.h" |
40 | | |
41 | | namespace doris::segment_v2 { |
42 | | |
43 | | class ConditionCacheHandle; |
44 | | |
45 | | class ConditionCache : public LRUCachePolicy { |
46 | | public: |
47 | | using LRUCachePolicy::insert; |
48 | | |
49 | | // The cache key or segment lru cache |
50 | | struct CacheKey { |
51 | | CacheKey(RowsetId rowset_id_, int64_t segment_id_, uint64_t digest_) |
52 | 2.29M | : rowset_id(rowset_id_), segment_id(segment_id_), digest(digest_) {} |
53 | | RowsetId rowset_id; |
54 | | int64_t segment_id; |
55 | | uint64_t digest; |
56 | | |
57 | | // Encode to a flat binary which can be used as LRUCache's key |
58 | 4.58M | [[nodiscard]] std::string encode() const { |
59 | 4.58M | char buf[16]; |
60 | 4.58M | memcpy(buf, &segment_id, 8); |
61 | 4.58M | memcpy(buf + 8, &digest, 8); |
62 | | |
63 | 4.58M | return rowset_id.to_string() + std::string(buf, 16); |
64 | 4.58M | } |
65 | | }; |
66 | | |
67 | | class CacheValue : public LRUCacheValueBase { |
68 | | public: |
69 | | std::shared_ptr<std::vector<bool>> filter_result; |
70 | | }; |
71 | | |
72 | | // Cache key for external tables (Hive ORC/Parquet) |
73 | | struct ExternalCacheKey { |
74 | | ExternalCacheKey(const std::string& path_, int64_t modification_time_, int64_t file_size_, |
75 | | uint64_t digest_, int64_t start_offset_, int64_t size_) |
76 | 10 | : path(path_), |
77 | 10 | modification_time(modification_time_), |
78 | 10 | file_size(file_size_), |
79 | 10 | digest(digest_), |
80 | 10 | start_offset(start_offset_), |
81 | 10 | size(size_) {} |
82 | | std::string path; |
83 | | int64_t modification_time; |
84 | | int64_t file_size; |
85 | | uint64_t digest; |
86 | | int64_t start_offset; |
87 | | int64_t size; |
88 | | |
89 | 10 | [[nodiscard]] std::string encode() const { |
90 | 10 | std::string key = path; |
91 | 10 | char buf[40]; |
92 | 10 | memcpy(buf, &modification_time, 8); |
93 | 10 | memcpy(buf + 8, &file_size, 8); |
94 | 10 | memcpy(buf + 16, &digest, 8); |
95 | 10 | memcpy(buf + 24, &start_offset, 8); |
96 | 10 | memcpy(buf + 32, &size, 8); |
97 | 10 | key.append(buf, 40); |
98 | 10 | return key; |
99 | 10 | } |
100 | | }; |
101 | | |
102 | | // Create global instance of this class |
103 | 17 | static ConditionCache* create_global_cache(size_t capacity, uint32_t num_shards = 16) { |
104 | 17 | auto* res = new ConditionCache(capacity, num_shards); |
105 | 17 | return res; |
106 | 17 | } |
107 | | |
108 | | // Return global instance. |
109 | | // Client should call create_global_cache before. |
110 | 2.29M | static ConditionCache* instance() { return ExecEnv::GetInstance()->get_condition_cache(); } |
111 | | |
112 | | ConditionCache() = delete; |
113 | | |
114 | | ConditionCache(size_t capacity, uint32_t num_shards) |
115 | 17 | : LRUCachePolicy(CachePolicy::CacheType::CONDITION_CACHE, capacity, LRUCacheType::SIZE, |
116 | 17 | config::inverted_index_cache_stale_sweep_time_sec, num_shards, |
117 | 17 | /*element_count_capacity*/ 0, /*enable_prune*/ true, |
118 | 17 | /*is_lru_k*/ true) {} |
119 | | |
120 | | bool lookup(const CacheKey& key, ConditionCacheHandle* handle); |
121 | | |
122 | | void insert(const CacheKey& key, std::shared_ptr<std::vector<bool>> filter_result); |
123 | | |
124 | | bool lookup(const ExternalCacheKey& key, ConditionCacheHandle* handle); |
125 | | |
126 | | void insert(const ExternalCacheKey& key, std::shared_ptr<std::vector<bool>> filter_result); |
127 | | }; |
128 | | |
129 | | class ConditionCacheHandle { |
130 | | public: |
131 | 1.19M | ConditionCacheHandle() = default; |
132 | | |
133 | | ConditionCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle) |
134 | 1.18M | : _cache(cache), _handle(handle) {} |
135 | | |
136 | 2.36M | ~ConditionCacheHandle() { |
137 | 2.36M | if (_handle != nullptr) { |
138 | 1.18M | _cache->release(_handle); |
139 | 1.18M | } |
140 | 2.36M | } |
141 | | |
142 | 0 | ConditionCacheHandle(ConditionCacheHandle&& other) noexcept { |
143 | 0 | // we can use std::exchange if we switch c++14 on |
144 | 0 | std::swap(_cache, other._cache); |
145 | 0 | std::swap(_handle, other._handle); |
146 | 0 | } |
147 | | |
148 | 71.9k | ConditionCacheHandle& operator=(ConditionCacheHandle&& other) noexcept { |
149 | 71.9k | std::swap(_cache, other._cache); |
150 | 71.9k | std::swap(_handle, other._handle); |
151 | 71.9k | return *this; |
152 | 71.9k | } |
153 | | |
154 | 0 | LRUCachePolicy* cache() const { return _cache; } |
155 | | |
156 | 71.8k | std::shared_ptr<std::vector<bool>> get_filter_result() const { |
157 | 71.8k | if (!_cache) { |
158 | 0 | return nullptr; |
159 | 0 | } |
160 | 71.8k | return ((ConditionCache::CacheValue*)_cache->value(_handle))->filter_result; |
161 | 71.8k | } |
162 | | |
163 | | private: |
164 | | LRUCachePolicy* _cache = nullptr; |
165 | | Cache::Handle* _handle = nullptr; |
166 | | |
167 | | // Don't allow copy and assign |
168 | | DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle); |
169 | | }; |
170 | | |
171 | | } // namespace doris::segment_v2 |