/root/doris/be/src/common/symbol_index.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/SymbolIndex.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #if defined(__ELF__) && !defined(__FreeBSD__) |
22 | | |
23 | | #include "common/symbol_index.h" |
24 | | |
25 | | #include <link.h> |
26 | | #include <pdqsort.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <cassert> |
30 | | #include <filesystem> |
31 | | #include <optional> |
32 | | |
33 | | #include "common/stack_trace.h" |
34 | | #include "exec/common/hex.h" |
35 | | |
36 | | /** |
37 | | |
38 | | ELF object can contain three different places with symbol names and addresses: |
39 | | |
40 | | 1. Symbol table in section headers. It is used for static linking and usually left in executable. |
41 | | It is not loaded in memory and they are not necessary for program to run. |
42 | | It does not relate to debug info and present regardless to -g flag. |
43 | | You can use strip to get rid of this symbol table. |
44 | | If you have this symbol table in your binary, you can manually read it and get symbol names, even for symbols from anonymous namespaces. |
45 | | |
46 | | 2. Hashes in program headers such as DT_HASH and DT_GNU_HASH. |
47 | | It is necessary for dynamic object (.so libraries and any dynamically linked executable that depend on .so libraries) |
48 | | because it is used for dynamic linking that happens in runtime and performed by dynamic loader. |
49 | | Only exported symbols will be presented in that hash tables. Symbols from anonymous namespaces are not. |
50 | | This part of executable binary is loaded in memory and accessible via 'dl_iterate_phdr', 'dladdr' and 'backtrace_symbols' functions from libc. |
51 | | ClickHouse versions prior to 19.13 has used just these symbol names to symbolize stack traces |
52 | | and stack traces may be incomplete due to lack of symbols with internal linkage. |
53 | | But because ClickHouse is linked with most of the symbols exported (-rdynamic flag) it can still provide good enough stack traces. |
54 | | |
55 | | 3. DWARF debug info. It contains the most detailed information about symbols and everything else. |
56 | | It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler. |
57 | | It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes) |
58 | | it is split to separate binary and provided in clickhouse-common-static-dbg package. |
59 | | This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse.debug and is loaded automatically by tools like gdb, addr2line. |
60 | | When you build ClickHouse by yourself, debug info is not split and present in a single huge binary. |
61 | | |
62 | | What ClickHouse is using to provide good stack traces? |
63 | | |
64 | | In versions prior to 19.13, only "program headers" (2) was used. |
65 | | |
66 | | In version 19.13, ClickHouse will read program headers (2) and cache them, |
67 | | also it will read itself as ELF binary and extract symbol tables from section headers (1) |
68 | | to also symbolize functions that are not exported for dynamic linking. |
69 | | And finally, it will read DWARF info (3) if available to display file names and line numbers. |
70 | | |
71 | | What detail can you obtain depending on your binary? |
72 | | |
73 | | If you have debug info (you build ClickHouse by yourself or install clickhouse-common-static-dbg package), you will get source file names and line numbers. |
74 | | Otherwise you will get only symbol names. If your binary contains symbol table in section headers (the default, unless stripped), you will get all symbol names. |
75 | | Otherwise you will get only exported symbols from program headers. |
76 | | |
77 | | */ |
78 | | |
79 | | #if defined(__clang__) |
80 | | #pragma clang diagnostic ignored "-Wreserved-id-macro" |
81 | | #pragma clang diagnostic ignored "-Wunused-macros" |
82 | | #endif |
83 | | |
84 | | #define __msan_unpoison_string(X) // NOLINT |
85 | | #if defined(__clang__) && defined(__has_feature) |
86 | | #if __has_feature(memory_sanitizer) |
87 | | #undef __msan_unpoison_string |
88 | | #include <sanitizer/msan_interface.h> |
89 | | #endif |
90 | | #endif |
91 | | |
92 | | namespace doris { |
93 | | |
94 | | namespace { |
95 | | |
96 | | /// Notes: "PHDR" is "Program Headers". |
97 | | /// To look at program headers, run: |
98 | | /// readelf -l ./clickhouse-server |
99 | | /// To look at section headers, run: |
100 | | /// readelf -S ./clickhouse-server |
101 | | /// Also look at: https://wiki.osdev.org/ELF |
102 | | /// Also look at: man elf |
103 | | /// http://www.linker-aliens.org/blogs/ali/entry/inside_elf_symbol_tables/ |
104 | | /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object |
105 | | |
106 | | void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, |
107 | 7.67M | const void* address, SymbolIndex::Resources& resources) { |
108 | 7.67M | const char* char_address = static_cast<const char*>(address); |
109 | | |
110 | 7.67M | if (name.starts_with("_binary_") || name.starts_with("binary_")) { |
111 | 18 | if (name.ends_with("_start")) { |
112 | 0 | name = name.substr((name[0] == '_') + strlen("binary_")); |
113 | 0 | name = name.substr(0, name.size() - strlen("_start")); |
114 | |
|
115 | 0 | auto& resource = resources[name]; |
116 | 0 | if (!resource.base_address || resource.base_address == base_address) { |
117 | 0 | resource.base_address = base_address; |
118 | 0 | resource.start = |
119 | 0 | std::string_view {char_address, 0}; // NOLINT(bugprone-string-constructor) |
120 | 0 | resource.object_name = object_name; |
121 | 0 | } |
122 | 0 | } |
123 | 18 | if (name.ends_with("_end")) { |
124 | 0 | name = name.substr((name[0] == '_') + strlen("binary_")); |
125 | 0 | name = name.substr(0, name.size() - strlen("_end")); |
126 | |
|
127 | 0 | auto& resource = resources[name]; |
128 | 0 | if (!resource.base_address || resource.base_address == base_address) { |
129 | 0 | resource.base_address = base_address; |
130 | 0 | resource.end = |
131 | 0 | std::string_view {char_address, 0}; // NOLINT(bugprone-string-constructor) |
132 | 0 | resource.object_name = object_name; |
133 | 0 | } |
134 | 0 | } |
135 | 18 | } |
136 | 7.67M | } |
137 | | |
138 | | /// Based on the code of musl-libc and the answer of Kanalpiroge on |
139 | | /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture |
140 | | /// It does not extract all the symbols (but only public - exported and used for dynamic linking), |
141 | | /// but will work if we cannot find or parse ELF files. |
142 | | void collectSymbolsFromProgramHeaders(dl_phdr_info* info, std::vector<SymbolIndex::Symbol>& symbols, |
143 | 12 | SymbolIndex::Resources& resources) { |
144 | | /* Iterate over all headers of the current shared lib |
145 | | * (first call is for the executable itself) |
146 | | */ |
147 | 104 | for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { |
148 | | /* Further processing is only needed if the dynamic section is reached |
149 | | */ |
150 | 92 | if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC) { |
151 | 80 | continue; |
152 | 80 | } |
153 | | |
154 | | /* Get a pointer to the first entry of the dynamic section. |
155 | | * It's address is the shared lib's address + the virtual address |
156 | | */ |
157 | 12 | const ElfW(Dyn)* dyn_begin = reinterpret_cast<const ElfW(Dyn)*>( |
158 | 12 | info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); |
159 | | |
160 | | /// For unknown reason, addresses are sometimes relative sometimes absolute. |
161 | 384 | auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr) { |
162 | 384 | return ptr > base ? ptr : base + ptr; |
163 | 384 | }; |
164 | | |
165 | | /* Iterate over all entries of the dynamic section until the |
166 | | * end of the symbol table is reached. This is indicated by |
167 | | * an entry with d_tag == DT_NULL. |
168 | | */ |
169 | | |
170 | 12 | size_t sym_cnt = 0; |
171 | 122 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
172 | 122 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
173 | | |
174 | | // TODO: this branch leads to invalid address of the hash table. Need further investigation. |
175 | | // if (it->d_tag == DT_HASH) |
176 | | // { |
177 | | // const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address); |
178 | | // sym_cnt = hash[1]; |
179 | | // break; |
180 | | // } |
181 | 122 | if (it->d_tag == DT_GNU_HASH) { |
182 | | /// This code based on Musl-libc. |
183 | | |
184 | 12 | const uint32_t* buckets = nullptr; |
185 | 12 | const uint32_t* hashval = nullptr; |
186 | | |
187 | 12 | const ElfW(Word)* hash = reinterpret_cast<const ElfW(Word)*>(base_address); |
188 | | |
189 | 12 | buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); |
190 | | |
191 | 3.83k | for (ElfW(Word) i = 0; i < hash[0]; ++i) { |
192 | 3.82k | if (buckets[i] > sym_cnt) { |
193 | 2.61k | sym_cnt = buckets[i]; |
194 | 2.61k | } |
195 | 3.82k | } |
196 | | |
197 | 12 | if (sym_cnt) { |
198 | 12 | sym_cnt -= hash[1]; |
199 | 12 | hashval = buckets + hash[0] + sym_cnt; |
200 | 24 | do { |
201 | 24 | ++sym_cnt; |
202 | 24 | } while (!(*hashval++ & 1)); |
203 | 12 | } |
204 | | |
205 | 12 | break; |
206 | 12 | } |
207 | 122 | } |
208 | | |
209 | 12 | if (!sym_cnt) { |
210 | 0 | continue; |
211 | 0 | } |
212 | | |
213 | 12 | const char* strtab = nullptr; |
214 | 128 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
215 | 128 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
216 | | |
217 | 128 | if (it->d_tag == DT_STRTAB) { |
218 | 12 | strtab = reinterpret_cast<const char*>(base_address); |
219 | 12 | break; |
220 | 12 | } |
221 | 128 | } |
222 | | |
223 | 12 | if (!strtab) { |
224 | 0 | continue; |
225 | 0 | } |
226 | | |
227 | 134 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
228 | 134 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
229 | | |
230 | 134 | if (it->d_tag == DT_SYMTAB) { |
231 | | /* Get the pointer to the first entry of the symbol table */ |
232 | 12 | const ElfW(Sym)* elf_sym = reinterpret_cast<const ElfW(Sym)*>(base_address); |
233 | | |
234 | | /* Iterate over the symbol table */ |
235 | 6.64k | for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { |
236 | | /* Get the name of the sym_index-th symbol. |
237 | | * This is located at the address of st_name relative to the beginning of the string table. |
238 | | */ |
239 | 6.62k | const char* sym_name = &strtab[elf_sym[sym_index].st_name]; |
240 | | |
241 | 6.62k | if (!sym_name) { |
242 | 0 | continue; |
243 | 0 | } |
244 | | |
245 | 6.62k | SymbolIndex::Symbol symbol; |
246 | 6.62k | symbol.address_begin = reinterpret_cast<const void*>( |
247 | 6.62k | info->dlpi_addr + elf_sym[sym_index].st_value); |
248 | 6.62k | symbol.address_end = reinterpret_cast<const void*>(info->dlpi_addr + |
249 | 6.62k | elf_sym[sym_index].st_value + |
250 | 6.62k | elf_sym[sym_index].st_size); |
251 | 6.62k | symbol.name = sym_name; |
252 | | |
253 | | /// We are not interested in empty symbols. |
254 | 6.62k | if (elf_sym[sym_index].st_size) { |
255 | 5.61k | symbols.push_back(symbol); |
256 | 5.61k | } |
257 | | |
258 | | /// But resources can be represented by a pair of empty symbols (indicating their boundaries). |
259 | 6.62k | updateResources(base_address, info->dlpi_name, symbol.name, |
260 | 6.62k | symbol.address_begin, resources); |
261 | 6.62k | } |
262 | | |
263 | 12 | break; |
264 | 12 | } |
265 | 134 | } |
266 | 12 | } |
267 | 12 | } |
268 | | |
269 | | #if !defined USE_MUSL |
270 | 12 | std::string getBuildIDFromProgramHeaders(dl_phdr_info* info) { |
271 | 64 | for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { |
272 | 63 | const ElfPhdr& phdr = info->dlpi_phdr[header_index]; |
273 | 63 | if (phdr.p_type != PT_NOTE) { |
274 | 52 | continue; |
275 | 52 | } |
276 | | |
277 | 11 | return Elf::getBuildID(reinterpret_cast<const char*>(info->dlpi_addr + phdr.p_vaddr), |
278 | 11 | phdr.p_memsz); |
279 | 63 | } |
280 | 1 | return {}; |
281 | 12 | } |
282 | | #endif |
283 | | |
284 | | void collectSymbolsFromELFSymbolTable(dl_phdr_info* info, const Elf& elf, |
285 | | const Elf::Section& symbol_table, |
286 | | const Elf::Section& string_table, |
287 | | std::vector<SymbolIndex::Symbol>& symbols, |
288 | 10 | SymbolIndex::Resources& resources) { |
289 | | /// Iterate symbol table. |
290 | 10 | const ElfSym* symbol_table_entry = reinterpret_cast<const ElfSym*>(symbol_table.begin()); |
291 | 10 | const ElfSym* symbol_table_end = reinterpret_cast<const ElfSym*>(symbol_table.end()); |
292 | | |
293 | 10 | const char* strings = string_table.begin(); |
294 | | |
295 | 7.68M | for (; symbol_table_entry < symbol_table_end; ++symbol_table_entry) { |
296 | 7.68M | if (!symbol_table_entry->st_name || !symbol_table_entry->st_value || |
297 | 7.68M | strings + symbol_table_entry->st_name >= elf.end()) { |
298 | 23.8k | continue; |
299 | 23.8k | } |
300 | | |
301 | | /// Find the name in strings table. |
302 | 7.66M | const char* symbol_name = strings + symbol_table_entry->st_name; |
303 | | |
304 | 7.66M | if (!symbol_name) { |
305 | 0 | continue; |
306 | 0 | } |
307 | | |
308 | 7.66M | SymbolIndex::Symbol symbol; |
309 | 7.66M | symbol.address_begin = |
310 | 7.66M | reinterpret_cast<const void*>(info->dlpi_addr + symbol_table_entry->st_value); |
311 | 7.66M | symbol.address_end = reinterpret_cast<const void*>( |
312 | 7.66M | info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); |
313 | 7.66M | symbol.name = symbol_name; |
314 | | |
315 | 7.66M | if (symbol_table_entry->st_size) { |
316 | 7.05M | symbols.push_back(symbol); |
317 | 7.05M | } |
318 | | |
319 | 7.66M | updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, |
320 | 7.66M | resources); |
321 | 7.66M | } |
322 | 10 | } |
323 | | |
324 | | bool searchAndCollectSymbolsFromELFSymbolTable(dl_phdr_info* info, const Elf& elf, |
325 | | unsigned section_header_type, |
326 | | const char* string_table_name, |
327 | | std::vector<SymbolIndex::Symbol>& symbols, |
328 | 11 | SymbolIndex::Resources& resources) { |
329 | 11 | std::optional<Elf::Section> symbol_table; |
330 | 11 | std::optional<Elf::Section> string_table; |
331 | | |
332 | 441 | if (!elf.iterateSections([&](const Elf::Section& section, size_t) { |
333 | 441 | if (section.header.sh_type == section_header_type) { |
334 | 10 | symbol_table.emplace(section); |
335 | 431 | } else if (section.header.sh_type == SHT_STRTAB && |
336 | 431 | 0 == strcmp(section.name(), string_table_name)) { |
337 | 10 | string_table.emplace(section); |
338 | 10 | } |
339 | | |
340 | 441 | return (symbol_table && string_table); |
341 | 441 | })) { |
342 | 1 | return false; |
343 | 1 | } |
344 | | |
345 | 10 | collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); |
346 | 10 | return true; |
347 | 11 | } |
348 | | |
349 | | void collectSymbolsFromELF(dl_phdr_info* info, std::vector<SymbolIndex::Symbol>& symbols, |
350 | | std::vector<SymbolIndex::Object>& objects, |
351 | 12 | SymbolIndex::Resources& resources, std::string& build_id) { |
352 | 12 | std::string object_name; |
353 | 12 | std::string our_build_id; |
354 | | #if defined(USE_MUSL) |
355 | | object_name = "/proc/self/exe"; |
356 | | our_build_id = Elf(object_name).getBuildID(); |
357 | | build_id = our_build_id; |
358 | | #else |
359 | | /// MSan does not know that the program segments in memory are initialized. |
360 | 12 | __msan_unpoison_string(info->dlpi_name); |
361 | | |
362 | 12 | object_name = info->dlpi_name; |
363 | 12 | our_build_id = getBuildIDFromProgramHeaders(info); |
364 | | |
365 | | /// If the name is empty and there is a non-empty build-id - it's main executable. |
366 | | /// Find a elf file for the main executable and set the build-id. |
367 | 12 | if (object_name.empty()) { |
368 | 2 | object_name = "/proc/self/exe"; |
369 | | |
370 | 2 | if (our_build_id.empty()) { |
371 | 2 | our_build_id = Elf(object_name).getBuildID(); |
372 | 2 | } |
373 | | |
374 | 2 | if (build_id.empty()) { |
375 | 2 | build_id = our_build_id; |
376 | 2 | } |
377 | 2 | } |
378 | 12 | #endif |
379 | | |
380 | 12 | std::error_code ec; |
381 | 12 | std::filesystem::path canonical_path = std::filesystem::canonical(object_name, ec); |
382 | 12 | if (ec) { |
383 | 0 | return; |
384 | 0 | } |
385 | | |
386 | | /// Debug info and symbol table sections may be split to separate binary. |
387 | 12 | std::filesystem::path local_debug_info_path = |
388 | 12 | canonical_path.parent_path() / canonical_path.stem(); |
389 | 12 | local_debug_info_path += ".debug"; |
390 | 12 | std::filesystem::path debug_info_path = |
391 | 12 | std::filesystem::path("/usr/lib/debug") / canonical_path.relative_path(); |
392 | 12 | debug_info_path += ".debug"; |
393 | | |
394 | | /// NOTE: This is a workaround for current package system. |
395 | | /// |
396 | | /// Since nfpm cannot copy file only if it exists, |
397 | | /// and so in cmake empty .debug file is created instead, |
398 | | /// but if we will try to load empty Elf file, then the CANNOT_PARSE_ELF |
399 | | /// exception will be thrown from the Elf::Elf. |
400 | 24 | auto exists_not_empty = [](const std::filesystem::path& path) { |
401 | 24 | return std::filesystem::exists(path) && !std::filesystem::is_empty(path); |
402 | 24 | }; |
403 | | |
404 | 12 | if (exists_not_empty(local_debug_info_path)) { |
405 | 0 | object_name = local_debug_info_path; |
406 | 12 | } else if (exists_not_empty(debug_info_path)) { |
407 | 0 | object_name = debug_info_path; |
408 | 12 | } else if (build_id.size() >= 2) { |
409 | | // Check if there is a .debug file in .build-id folder. For example: |
410 | | // /usr/lib/debug/.build-id/e4/0526a12e9a8f3819a18694f6b798f10c624d5c.debug |
411 | 0 | std::string build_id_hex; |
412 | 0 | build_id_hex.resize(build_id.size() * 2); |
413 | |
|
414 | 0 | char* pos = build_id_hex.data(); |
415 | 0 | for (auto c : build_id) { |
416 | 0 | write_hex_byte_lowercase(c, pos); |
417 | 0 | pos += 2; |
418 | 0 | } |
419 | |
|
420 | 0 | std::filesystem::path build_id_debug_info_path( |
421 | 0 | fmt::format("/usr/lib/debug/.build-id/{}/{}.debug", build_id_hex.substr(0, 2), |
422 | 0 | build_id_hex.substr(2))); |
423 | 0 | if (exists_not_empty(build_id_debug_info_path)) { |
424 | 0 | object_name = build_id_debug_info_path; |
425 | 0 | } else { |
426 | 0 | object_name = canonical_path; |
427 | 0 | } |
428 | 12 | } else { |
429 | 12 | object_name = canonical_path; |
430 | 12 | } |
431 | | /// But we have to compare Build ID to check that debug info corresponds to the same executable. |
432 | | |
433 | 12 | SymbolIndex::Object object; |
434 | 12 | object.elf = std::make_unique<Elf>(object_name); |
435 | | |
436 | 12 | std::string file_build_id = object.elf->getBuildID(); |
437 | | |
438 | 12 | if (our_build_id != file_build_id) { |
439 | | /// If debug info doesn't correspond to our binary, fallback to the info in our binary. |
440 | 1 | if (object_name != canonical_path) { |
441 | 0 | object_name = canonical_path; |
442 | 0 | object.elf = std::make_unique<Elf>(object_name); |
443 | | |
444 | | /// But it can still be outdated, for example, if executable file was deleted from filesystem and replaced by another file. |
445 | 0 | file_build_id = object.elf->getBuildID(); |
446 | 0 | if (our_build_id != file_build_id) { |
447 | 0 | return; |
448 | 0 | } |
449 | 1 | } else { |
450 | 1 | return; |
451 | 1 | } |
452 | 1 | } |
453 | | |
454 | 11 | object.address_begin = reinterpret_cast<const void*>(info->dlpi_addr); |
455 | 11 | object.address_end = reinterpret_cast<const void*>(info->dlpi_addr + object.elf->size()); |
456 | 11 | object.name = object_name; |
457 | 11 | objects.push_back(std::move(object)); |
458 | | |
459 | 11 | searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", |
460 | 11 | symbols, resources); |
461 | | |
462 | | /// Unneeded if they were parsed from "program headers" of loaded objects. |
463 | | #if defined USE_MUSL |
464 | | searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", |
465 | | symbols, resources); |
466 | | #endif |
467 | 11 | } |
468 | | |
469 | | /* Callback for dl_iterate_phdr. |
470 | | * Is called by dl_iterate_phdr for every loaded shared lib until something |
471 | | * else than 0 is returned by one call of this function. |
472 | | */ |
473 | 12 | int collectSymbols(dl_phdr_info* info, size_t, void* data_ptr) { |
474 | 12 | SymbolIndex::Data& data = *reinterpret_cast<SymbolIndex::Data*>(data_ptr); |
475 | | |
476 | 12 | collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); |
477 | 12 | collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); |
478 | | |
479 | | /* Continue iterations */ |
480 | 12 | return 0; |
481 | 12 | } |
482 | | |
483 | | template <typename T> |
484 | 10.7k | const T* find(const void* address, const std::vector<T>& vec) { |
485 | | /// First range that has left boundary greater than address. |
486 | | |
487 | 10.7k | auto it = std::lower_bound( |
488 | 10.7k | vec.begin(), vec.end(), address, |
489 | 143k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; });symbol_index.cpp:_ZZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6SymbolEEEPKT_PKvRKSt6vectorIS4_SaIS4_EEENKUlRKS3_S8_E_clESF_S8_ Line | Count | Source | 489 | 122k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); |
symbol_index.cpp:_ZZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6ObjectEEEPKT_PKvRKSt6vectorIS4_SaIS4_EEENKUlRKS3_S8_E_clESF_S8_ Line | Count | Source | 489 | 21.5k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); |
|
490 | | |
491 | 10.7k | if (it == vec.begin()) { |
492 | 0 | return nullptr; |
493 | 10.7k | } else { |
494 | 10.7k | --it; /// Last range that has left boundary less or equals than address. |
495 | 10.7k | } |
496 | | |
497 | 10.7k | if (address >= it->address_begin && address < it->address_end) { |
498 | 10.7k | return &*it; |
499 | 10.7k | } else { |
500 | 0 | return nullptr; |
501 | 0 | } |
502 | 10.7k | } symbol_index.cpp:_ZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6SymbolEEEPKT_PKvRKSt6vectorIS4_SaIS4_EE Line | Count | Source | 484 | 5.38k | const T* find(const void* address, const std::vector<T>& vec) { | 485 | | /// First range that has left boundary greater than address. | 486 | | | 487 | 5.38k | auto it = std::lower_bound( | 488 | 5.38k | vec.begin(), vec.end(), address, | 489 | 5.38k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); | 490 | | | 491 | 5.38k | if (it == vec.begin()) { | 492 | 0 | return nullptr; | 493 | 5.38k | } else { | 494 | 5.38k | --it; /// Last range that has left boundary less or equals than address. | 495 | 5.38k | } | 496 | | | 497 | 5.38k | if (address >= it->address_begin && address < it->address_end) { | 498 | 5.38k | return &*it; | 499 | 5.38k | } else { | 500 | 0 | return nullptr; | 501 | 0 | } | 502 | 5.38k | } |
symbol_index.cpp:_ZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6ObjectEEEPKT_PKvRKSt6vectorIS4_SaIS4_EE Line | Count | Source | 484 | 5.38k | const T* find(const void* address, const std::vector<T>& vec) { | 485 | | /// First range that has left boundary greater than address. | 486 | | | 487 | 5.38k | auto it = std::lower_bound( | 488 | 5.38k | vec.begin(), vec.end(), address, | 489 | 5.38k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); | 490 | | | 491 | 5.38k | if (it == vec.begin()) { | 492 | 0 | return nullptr; | 493 | 5.38k | } else { | 494 | 5.38k | --it; /// Last range that has left boundary less or equals than address. | 495 | 5.38k | } | 496 | | | 497 | 5.38k | if (address >= it->address_begin && address < it->address_end) { | 498 | 5.38k | return &*it; | 499 | 5.38k | } else { | 500 | 0 | return nullptr; | 501 | 0 | } | 502 | 5.38k | } |
|
503 | | |
504 | | } // namespace |
505 | | |
506 | 1 | void SymbolIndex::update() { |
507 | 1 | dl_iterate_phdr(collectSymbols, &data); |
508 | 1 | ::pdqsort(data.objects.begin(), data.objects.end(), |
509 | 49 | [](const Object& a, const Object& b) { return a.address_begin < b.address_begin; }); |
510 | 1 | ::pdqsort(data.symbols.begin(), data.symbols.end(), |
511 | 180M | [](const Symbol& a, const Symbol& b) { return a.address_begin < b.address_begin; }); |
512 | | /// We found symbols both from loaded program headers and from ELF symbol tables. |
513 | 1 | data.symbols.erase(std::unique(data.symbols.begin(), data.symbols.end(), |
514 | 7.06M | [](const Symbol& a, const Symbol& b) { |
515 | 7.06M | return a.address_begin == b.address_begin && |
516 | 7.06M | a.address_end == b.address_end; |
517 | 7.06M | }), |
518 | 1 | data.symbols.end()); |
519 | 1 | } |
520 | | |
521 | 5.38k | const SymbolIndex::Symbol* SymbolIndex::findSymbol(const void* address) const { |
522 | 5.38k | return find(address, data.symbols); |
523 | 5.38k | } |
524 | | |
525 | 5.38k | const SymbolIndex::Object* SymbolIndex::findObject(const void* address) const { |
526 | 5.38k | return find(address, data.objects); |
527 | 5.38k | } |
528 | | |
529 | 0 | std::string SymbolIndex::getBuildIDHex() const { |
530 | 0 | std::string build_id_binary = getBuildID(); |
531 | 0 | std::string build_id_hex; |
532 | 0 | build_id_hex.resize(build_id_binary.size() * 2); |
533 | |
|
534 | 0 | char* pos = build_id_hex.data(); |
535 | 0 | for (auto c : build_id_binary) { |
536 | 0 | write_hex_byte_uppercase(c, pos); |
537 | 0 | pos += 2; |
538 | 0 | } |
539 | |
|
540 | 0 | return build_id_hex; |
541 | 0 | } |
542 | | |
543 | 266 | MultiVersion<SymbolIndex>& SymbolIndex::instanceImpl() { |
544 | 266 | static MultiVersion<SymbolIndex> instance(std::unique_ptr<SymbolIndex>(new SymbolIndex)); |
545 | 266 | return instance; |
546 | 266 | } |
547 | | |
548 | 266 | MultiVersion<SymbolIndex>::Version SymbolIndex::instance() { |
549 | 266 | return instanceImpl().get(); |
550 | 266 | } |
551 | | |
552 | 0 | void SymbolIndex::reload() { |
553 | 0 | instanceImpl().set(std::unique_ptr<SymbolIndex>(new SymbolIndex)); |
554 | | /// Also drop stacktrace cache. |
555 | 0 | StackTrace::dropCache(); |
556 | 0 | } |
557 | | |
558 | | } // namespace doris |
559 | | |
560 | | #endif |