be/src/storage/segment/condition_cache.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <atomic> |
26 | | #include <memory> |
27 | | #include <roaring/roaring.hh> |
28 | | #include <string> |
29 | | |
30 | | #include "common/config.h" |
31 | | #include "common/status.h" |
32 | | #include "io/fs/file_system.h" |
33 | | #include "io/fs/path.h" |
34 | | #include "runtime/exec_env.h" |
35 | | #include "runtime/memory/lru_cache_policy.h" |
36 | | #include "runtime/memory/mem_tracker.h" |
37 | | #include "util/lru_cache.h" |
38 | | #include "util/slice.h" |
39 | | #include "util/time.h" |
40 | | |
41 | | namespace doris::segment_v2 { |
42 | | |
43 | | class ConditionCacheHandle; |
44 | | |
45 | | class ConditionCache : public LRUCachePolicy { |
46 | | public: |
47 | | using LRUCachePolicy::insert; |
48 | | |
49 | | // The cache key or segment lru cache |
50 | | struct CacheKey { |
51 | | CacheKey(RowsetId rowset_id_, int64_t segment_id_, uint64_t digest_) |
52 | 2.32M | : rowset_id(rowset_id_), segment_id(segment_id_), digest(digest_) {} |
53 | | RowsetId rowset_id; |
54 | | int64_t segment_id; |
55 | | uint64_t digest; |
56 | | |
57 | | // Encode to a flat binary which can be used as LRUCache's key |
58 | 2.31M | [[nodiscard]] std::string encode() const { |
59 | 2.31M | char buf[16]; |
60 | 2.31M | memcpy(buf, &segment_id, 8); |
61 | 2.31M | memcpy(buf + 8, &digest, 8); |
62 | | |
63 | 2.31M | return rowset_id.to_string() + std::string(buf, 16); |
64 | 2.31M | } |
65 | | }; |
66 | | |
67 | | class CacheValue : public LRUCacheValueBase { |
68 | | public: |
69 | | std::shared_ptr<std::vector<bool>> filter_result; |
70 | | }; |
71 | | |
72 | | // Cache key for external tables (Hive ORC/Parquet) |
73 | | struct ExternalCacheKey { |
74 | 2.69k | ExternalCacheKey() = default; |
75 | | ExternalCacheKey(const std::string& path_, int64_t modification_time_, int64_t file_size_, |
76 | | uint64_t digest_, int64_t start_offset_, int64_t size_) |
77 | 7 | : path(path_), |
78 | 7 | modification_time(modification_time_), |
79 | 7 | file_size(file_size_), |
80 | 7 | digest(digest_), |
81 | 7 | start_offset(start_offset_), |
82 | 7 | size(size_) {} |
83 | | std::string path; |
84 | | int64_t modification_time = 0; |
85 | | int64_t file_size = 0; |
86 | | uint64_t digest = 0; |
87 | | int64_t start_offset = 0; |
88 | | int64_t size = 0; |
89 | | |
90 | 8 | [[nodiscard]] std::string encode() const { |
91 | 8 | std::string key = path; |
92 | 8 | char buf[40]; |
93 | 8 | memcpy(buf, &modification_time, 8); |
94 | 8 | memcpy(buf + 8, &file_size, 8); |
95 | 8 | memcpy(buf + 16, &digest, 8); |
96 | 8 | memcpy(buf + 24, &start_offset, 8); |
97 | 8 | memcpy(buf + 32, &size, 8); |
98 | 8 | key.append(buf, 40); |
99 | 8 | return key; |
100 | 8 | } |
101 | | }; |
102 | | |
103 | | // Create global instance of this class |
104 | 17 | static ConditionCache* create_global_cache(size_t capacity, uint32_t num_shards = 16) { |
105 | 17 | auto* res = new ConditionCache(capacity, num_shards); |
106 | 17 | return res; |
107 | 17 | } |
108 | | |
109 | | // Return global instance. |
110 | | // Client should call create_global_cache before. |
111 | 2.32M | static ConditionCache* instance() { return ExecEnv::GetInstance()->get_condition_cache(); } |
112 | | |
113 | | ConditionCache() = delete; |
114 | | |
115 | | ConditionCache(size_t capacity, uint32_t num_shards) |
116 | 17 | : LRUCachePolicy(CachePolicy::CacheType::CONDITION_CACHE, capacity, LRUCacheType::SIZE, |
117 | 17 | config::inverted_index_cache_stale_sweep_time_sec, num_shards, |
118 | 17 | /*element_count_capacity*/ 0, /*enable_prune*/ true, |
119 | 17 | /*is_lru_k*/ true) {} |
120 | | |
121 | | template <typename KeyType> |
122 | | bool lookup(const KeyType& key, ConditionCacheHandle* handle); |
123 | | |
124 | | template <typename KeyType> |
125 | | void insert(const KeyType& key, std::shared_ptr<std::vector<bool>> filter_result); |
126 | | }; |
127 | | |
128 | | class ConditionCacheHandle { |
129 | | public: |
130 | 1.21M | ConditionCacheHandle() = default; |
131 | | |
132 | | ConditionCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle) |
133 | 1.19M | : _cache(cache), _handle(handle) {} |
134 | | |
135 | 2.40M | ~ConditionCacheHandle() { |
136 | 2.40M | if (_handle != nullptr) { |
137 | 1.19M | _cache->release(_handle); |
138 | 1.19M | } |
139 | 2.40M | } |
140 | | |
141 | 0 | ConditionCacheHandle(ConditionCacheHandle&& other) noexcept { |
142 | 0 | // we can use std::exchange if we switch c++14 on |
143 | 0 | std::swap(_cache, other._cache); |
144 | 0 | std::swap(_handle, other._handle); |
145 | 0 | } |
146 | | |
147 | 76.5k | ConditionCacheHandle& operator=(ConditionCacheHandle&& other) noexcept { |
148 | 76.5k | std::swap(_cache, other._cache); |
149 | 76.5k | std::swap(_handle, other._handle); |
150 | 76.5k | return *this; |
151 | 76.5k | } |
152 | | |
153 | 0 | LRUCachePolicy* cache() const { return _cache; } |
154 | | |
155 | 75.6k | std::shared_ptr<std::vector<bool>> get_filter_result() const { |
156 | 75.6k | if (!_cache) { |
157 | 0 | return nullptr; |
158 | 0 | } |
159 | 75.6k | return ((ConditionCache::CacheValue*)_cache->value(_handle))->filter_result; |
160 | 75.6k | } |
161 | | |
162 | | private: |
163 | | LRUCachePolicy* _cache = nullptr; |
164 | | Cache::Handle* _handle = nullptr; |
165 | | |
166 | | // Don't allow copy and assign |
167 | | DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle); |
168 | | }; |
169 | | |
170 | | } // namespace doris::segment_v2 |