Coverage Report

Created: 2026-03-13 09:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/cache/file_cache_common.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h
19
// and modified by Doris
20
21
#include "io/cache/file_cache_common.h"
22
23
#include "common/config.h"
24
#include "exec/common/hex.h"
25
#include "io/cache/block_file_cache.h"
26
27
namespace doris::io {
28
29
114k
std::string cache_type_to_surfix(FileCacheType type) {
30
114k
    switch (type) {
31
24.1k
    case FileCacheType::INDEX:
32
24.1k
        return "_idx";
33
3.37k
    case FileCacheType::DISPOSABLE:
34
3.37k
        return "_disposable";
35
86.5k
    case FileCacheType::NORMAL:
36
86.5k
        return "";
37
0
    case FileCacheType::TTL:
38
0
        return "_ttl";
39
0
    case FileCacheType::COLD_NORMAL:
40
0
        return "_cold_normal";
41
114k
    }
42
0
    return "";
43
114k
}
44
45
3
FileCacheType surfix_to_cache_type(const std::string& str) {
46
3
    if (str == "idx") {
47
1
        return FileCacheType::INDEX;
48
2
    } else if (str == "disposable") {
49
1
        return FileCacheType::DISPOSABLE;
50
1
    } else if (str == "ttl") {
51
1
        return FileCacheType::TTL;
52
1
    } else if (str == "cold_normal") {
53
0
        return FileCacheType::COLD_NORMAL;
54
0
    }
55
3
    DCHECK(false) << "The string is " << str;
56
0
    return FileCacheType::DISPOSABLE;
57
3
}
58
59
9.42k
FileCacheType string_to_cache_type(const std::string& str) {
60
9.42k
    if (str == "normal") {
61
1.88k
        return FileCacheType::NORMAL;
62
7.54k
    } else if (str == "index") {
63
1.88k
        return FileCacheType::INDEX;
64
5.65k
    } else if (str == "disposable") {
65
1.88k
        return FileCacheType::DISPOSABLE;
66
3.77k
    } else if (str == "ttl") {
67
1.88k
        return FileCacheType::TTL;
68
1.88k
    } else if (str == "cold_normal") {
69
1.88k
        return FileCacheType::COLD_NORMAL;
70
1.88k
    }
71
9.42k
    DCHECK(false) << "The string is " << str;
72
0
    return FileCacheType::NORMAL;
73
9.42k
}
74
7.62k
std::string cache_type_to_string(FileCacheType type) {
75
7.62k
    switch (type) {
76
3.06k
    case FileCacheType::INDEX:
77
3.06k
        return "index";
78
178
    case FileCacheType::DISPOSABLE:
79
178
        return "disposable";
80
4.10k
    case FileCacheType::NORMAL:
81
4.10k
        return "normal";
82
276
    case FileCacheType::TTL:
83
276
        return "ttl";
84
0
    case FileCacheType::COLD_NORMAL:
85
0
        return "cold_normal";
86
7.62k
    }
87
7.62k
    DCHECK(false) << "unknown type: " << type;
88
0
    return "normal";
89
7.62k
}
90
91
172
std::string FileCacheSettings::to_string() const {
92
172
    std::stringstream ss;
93
172
    ss << "capacity: " << capacity << ", max_file_block_size: " << max_file_block_size
94
172
       << ", max_query_cache_size: " << max_query_cache_size
95
172
       << ", disposable_queue_size: " << disposable_queue_size
96
172
       << ", disposable_queue_elements: " << disposable_queue_elements
97
172
       << ", index_queue_size: " << index_queue_size
98
172
       << ", index_queue_elements: " << index_queue_elements
99
172
       << ", ttl_queue_size: " << ttl_queue_size << ", ttl_queue_elements: " << ttl_queue_elements
100
172
       << ", query_queue_size: " << query_queue_size
101
172
       << ", query_queue_elements: " << query_queue_elements
102
172
       << ", cold_query_queue_size: " << cold_query_queue_size
103
172
       << ", cold_query_queue_elements: " << cold_query_queue_elements << ", storage: " << storage;
104
172
    return ss.str();
105
172
}
106
107
FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size,
108
                                          size_t normal_percent, size_t disposable_percent,
109
                                          size_t index_percent, size_t ttl_percent,
110
9
                                          const std::string& storage) {
111
9
    io::FileCacheSettings settings;
112
9
    if (capacity == 0) {
113
0
        return settings;
114
0
    }
115
9
    settings.capacity = capacity;
116
9
    settings.max_file_block_size = config::file_cache_each_block_size;
117
9
    settings.max_query_cache_size = max_query_cache_size;
118
9
    size_t per_size = settings.capacity / 100;
119
9
    settings.disposable_queue_size = per_size * disposable_percent;
120
9
    settings.disposable_queue_elements =
121
9
            std::max(settings.disposable_queue_size / settings.max_file_block_size,
122
9
                     REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS);
123
124
9
    settings.index_queue_size = per_size * index_percent;
125
9
    settings.index_queue_elements =
126
9
            std::max(settings.index_queue_size / settings.max_file_block_size,
127
9
                     REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS);
128
129
9
    settings.ttl_queue_size = per_size * ttl_percent;
130
9
    settings.ttl_queue_elements = std::max(settings.ttl_queue_size / settings.max_file_block_size,
131
9
                                           REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS);
132
133
9
    settings.query_queue_size = settings.capacity - settings.disposable_queue_size -
134
9
                                settings.index_queue_size - settings.ttl_queue_size;
135
9
    settings.query_queue_elements =
136
9
            std::max(settings.query_queue_size / settings.max_file_block_size,
137
9
                     REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS);
138
139
9
    if (config::enable_file_cache_normal_queue_2qlru) {
140
0
        size_t normal_queue_per_size = settings.query_queue_size / 100;
141
0
        size_t normal_queue_per_elements = settings.query_queue_elements / 100;
142
143
0
        settings.cold_query_queue_size =
144
0
                normal_queue_per_size * config::file_cache_2qlru_cold_blocks_percent;
145
0
        settings.cold_query_queue_elements =
146
0
                normal_queue_per_elements * config::file_cache_2qlru_cold_blocks_percent;
147
148
0
        settings.query_queue_size -= settings.cold_query_queue_size;
149
0
        settings.query_queue_elements -= settings.cold_query_queue_elements;
150
0
    }
151
152
9
    settings.storage = storage;
153
9
    return settings;
154
9
}
155
156
566k
std::string UInt128Wrapper::to_string() const {
157
566k
    return get_hex_uint_lowercase(value_);
158
566k
}
159
160
FileBlocksHolderPtr FileCacheAllocatorBuilder::allocate_cache_holder(size_t offset, size_t size,
161
7.17k
                                                                     int64_t tablet_id) const {
162
7.17k
    CacheContext ctx;
163
7.17k
    ctx.cache_type = _expiration_time == 0 ? (config::enable_file_cache_normal_queue_2qlru
164
7.17k
                                                      ? FileCacheType::COLD_NORMAL
165
7.17k
                                                      : FileCacheType::NORMAL)
166
7.17k
                                           : FileCacheType::TTL;
167
7.17k
    ctx.expiration_time = _expiration_time;
168
7.17k
    ctx.is_cold_data = _is_cold_data;
169
7.17k
    ctx.tablet_id = tablet_id;
170
7.17k
    ReadStatistics stats;
171
7.17k
    ctx.stats = &stats;
172
7.17k
    auto holder = _cache->get_or_set(_cache_hash, offset, size, ctx);
173
7.17k
    return std::make_unique<FileBlocksHolder>(std::move(holder));
174
7.17k
}
175
176
template size_t LRUQueue::get_capacity(std::lock_guard<std::mutex>& cache_lock) const;
177
template void LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock);
178
179
0
std::string FileCacheInfo::to_string() const {
180
0
    std::stringstream ss;
181
0
    ss << "Hash: " << hash.to_string() << "\n"
182
0
       << "Expiration Time: " << expiration_time << "\n"
183
0
       << "Offset: " << offset << "\n"
184
0
       << "Cache Type: " << cache_type_to_string(cache_type) << "\n";
185
0
    return ss.str();
186
0
}
187
188
0
std::string InconsistencyType::to_string() const {
189
0
    std::string result = "Inconsistency Reason: ";
190
0
    if (type == NONE) {
191
0
        result += "NONE";
192
0
    } else {
193
0
        if (type & NOT_LOADED) {
194
0
            result += "NOT_LOADED ";
195
0
        }
196
0
        if (type & MISSING_IN_STORAGE) {
197
0
            result += "MISSING_IN_STORAGE ";
198
0
        }
199
0
        if (type & SIZE_INCONSISTENT) {
200
0
            result += "SIZE_INCONSISTENT ";
201
0
        }
202
0
        if (type & CACHE_TYPE_INCONSISTENT) {
203
0
            result += "CACHE_TYPE_INCONSISTENT ";
204
0
        }
205
0
        if (type & EXPIRATION_TIME_INCONSISTENT) {
206
0
            result += "EXPIRATION_TIME_INCONSISTENT ";
207
0
        }
208
0
        if (type & TMP_FILE_EXPECT_DOWNLOADING_STATE) {
209
0
            result += "TMP_FILE_EXPECT_DOWNLOADING_STATE";
210
0
        }
211
0
    }
212
0
    result += "\n";
213
0
    return result;
214
0
}
215
216
4.27M
std::optional<int64_t> get_tablet_id(std::string file_path) {
217
    // Expected path formats:
218
    // support both .dat and .idx file extensions
219
    // support formate see ut. storage_resource_test:StorageResourceTest.ParseTabletIdFromPath
220
221
4.27M
    if (file_path.empty()) {
222
0
        return std::nullopt;
223
0
    }
224
225
    // Find the position of "data/" in the path
226
4.27M
    std::string_view path_view = file_path;
227
4.27M
    std::string_view data_prefix = DATA_PREFIX;
228
4.27M
    size_t data_pos = path_view.find(data_prefix);
229
4.27M
    if (data_pos == std::string_view::npos) {
230
1.28k
        return std::nullopt;
231
1.28k
    }
232
233
4.27M
    if (data_prefix.length() + data_pos >= path_view.length()) {
234
1
        return std::nullopt;
235
1
    }
236
237
    // Extract the part after "data/"
238
4.27M
    path_view = path_view.substr(data_pos + data_prefix.length() + 1);
239
240
    // Check if path ends with .dat or .idx
241
4.27M
    if (!path_view.ends_with(".dat") && !path_view.ends_with(".idx")) {
242
14.8k
        return std::nullopt;
243
14.8k
    }
244
245
    // Count slashes in the remaining path
246
4.25M
    size_t slash_count = 0;
247
289M
    for (char c : path_view) {
248
289M
        if (c == '/') {
249
4.25M
            slash_count++;
250
4.25M
        }
251
289M
    }
252
253
    // Split path by '/'
254
4.25M
    std::vector<std::string_view> parts;
255
4.25M
    size_t start = 0;
256
4.25M
    size_t pos = 0;
257
8.51M
    while ((pos = path_view.find('/', start)) != std::string_view::npos) {
258
4.25M
        if (pos > start) {
259
4.25M
            parts.push_back(path_view.substr(start, pos - start));
260
4.25M
        }
261
4.25M
        start = pos + 1;
262
4.25M
    }
263
4.25M
    if (start < path_view.length()) {
264
4.25M
        parts.push_back(path_view.substr(start));
265
4.25M
    }
266
267
4.25M
    if (parts.empty()) {
268
0
        return std::nullopt;
269
0
    }
270
271
    // Determine path version based on slash count and extract tablet_id
272
    // Version 0: {tablet_id}/{rowset_id}_{seg_id}.dat (1 slash)
273
    // Version 1: {shard}/{tablet_id}/{rowset_id}/{seg_id}.dat (3 slashes)
274
275
4.25M
    if (slash_count == 1) {
276
        // Version 0 format: parts[0] should be tablet_id
277
4.25M
        if (parts.size() >= 1) {
278
4.25M
            try {
279
4.25M
                int64_t tablet_id = std::stoll(std::string(parts[0]));
280
4.25M
                return tablet_id;
281
4.25M
            } catch (const std::exception&) {
282
                // Not a valid number, return nullopt at last
283
0
            }
284
4.25M
        }
285
18.4E
    } else if (slash_count == 3) {
286
        // Version 1 format: parts[1] should be tablet_id (parts[0] is shard)
287
0
        if (parts.size() >= 2) {
288
0
            try {
289
0
                int64_t tablet_id = std::stoll(std::string(parts[1]));
290
0
                return tablet_id;
291
0
            } catch (const std::exception&) {
292
                // Not a valid number, return nullopt at last
293
0
            }
294
0
        }
295
0
    }
296
297
18.4E
    return std::nullopt;
298
4.25M
}
299
300
} // namespace doris::io