/root/doris/be/src/common/symbol_index.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/SymbolIndex.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #if defined(__ELF__) && !defined(__FreeBSD__) |
22 | | |
23 | | #include <common/symbol_index.h> |
24 | | #include <link.h> |
25 | | #include <pdqsort.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cassert> |
29 | | #include <filesystem> |
30 | | #include <optional> |
31 | | |
32 | | #include "common/stack_trace.h" |
33 | | #include "vec/common/hex.h" |
34 | | |
35 | | /** |
36 | | |
37 | | ELF object can contain three different places with symbol names and addresses: |
38 | | |
39 | | 1. Symbol table in section headers. It is used for static linking and usually left in executable. |
40 | | It is not loaded in memory and they are not necessary for program to run. |
41 | | It does not relate to debug info and present regardless to -g flag. |
42 | | You can use strip to get rid of this symbol table. |
43 | | If you have this symbol table in your binary, you can manually read it and get symbol names, even for symbols from anonymous namespaces. |
44 | | |
45 | | 2. Hashes in program headers such as DT_HASH and DT_GNU_HASH. |
46 | | It is necessary for dynamic object (.so libraries and any dynamically linked executable that depend on .so libraries) |
47 | | because it is used for dynamic linking that happens in runtime and performed by dynamic loader. |
48 | | Only exported symbols will be presented in that hash tables. Symbols from anonymous namespaces are not. |
49 | | This part of executable binary is loaded in memory and accessible via 'dl_iterate_phdr', 'dladdr' and 'backtrace_symbols' functions from libc. |
50 | | ClickHouse versions prior to 19.13 has used just these symbol names to symbolize stack traces |
51 | | and stack traces may be incomplete due to lack of symbols with internal linkage. |
52 | | But because ClickHouse is linked with most of the symbols exported (-rdynamic flag) it can still provide good enough stack traces. |
53 | | |
54 | | 3. DWARF debug info. It contains the most detailed information about symbols and everything else. |
55 | | It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler. |
56 | | It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes) |
57 | | it is split to separate binary and provided in clickhouse-common-static-dbg package. |
58 | | This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse.debug and is loaded automatically by tools like gdb, addr2line. |
59 | | When you build ClickHouse by yourself, debug info is not split and present in a single huge binary. |
60 | | |
61 | | What ClickHouse is using to provide good stack traces? |
62 | | |
63 | | In versions prior to 19.13, only "program headers" (2) was used. |
64 | | |
65 | | In version 19.13, ClickHouse will read program headers (2) and cache them, |
66 | | also it will read itself as ELF binary and extract symbol tables from section headers (1) |
67 | | to also symbolize functions that are not exported for dynamic linking. |
68 | | And finally, it will read DWARF info (3) if available to display file names and line numbers. |
69 | | |
70 | | What detail can you obtain depending on your binary? |
71 | | |
72 | | If you have debug info (you build ClickHouse by yourself or install clickhouse-common-static-dbg package), you will get source file names and line numbers. |
73 | | Otherwise you will get only symbol names. If your binary contains symbol table in section headers (the default, unless stripped), you will get all symbol names. |
74 | | Otherwise you will get only exported symbols from program headers. |
75 | | |
76 | | */ |
77 | | |
78 | | #if defined(__clang__) |
79 | | #pragma clang diagnostic ignored "-Wreserved-id-macro" |
80 | | #pragma clang diagnostic ignored "-Wunused-macros" |
81 | | #endif |
82 | | |
83 | | #define __msan_unpoison_string(X) // NOLINT |
84 | | #if defined(__clang__) && defined(__has_feature) |
85 | | #if __has_feature(memory_sanitizer) |
86 | | #undef __msan_unpoison_string |
87 | | #include <sanitizer/msan_interface.h> |
88 | | #endif |
89 | | #endif |
90 | | |
91 | | namespace doris { |
92 | | |
93 | | namespace { |
94 | | |
95 | | /// Notes: "PHDR" is "Program Headers". |
96 | | /// To look at program headers, run: |
97 | | /// readelf -l ./clickhouse-server |
98 | | /// To look at section headers, run: |
99 | | /// readelf -S ./clickhouse-server |
100 | | /// Also look at: https://wiki.osdev.org/ELF |
101 | | /// Also look at: man elf |
102 | | /// http://www.linker-aliens.org/blogs/ali/entry/inside_elf_symbol_tables/ |
103 | | /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object |
104 | | |
105 | | void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, |
106 | 16.6M | const void* address, SymbolIndex::Resources& resources) { |
107 | 16.6M | const char* char_address = static_cast<const char*>(address); |
108 | | |
109 | 16.6M | if (name.starts_with("_binary_") || name.starts_with("binary_")) { |
110 | 18 | if (name.ends_with("_start")) { |
111 | 0 | name = name.substr((name[0] == '_') + strlen("binary_")); |
112 | 0 | name = name.substr(0, name.size() - strlen("_start")); |
113 | |
|
114 | 0 | auto& resource = resources[name]; |
115 | 0 | if (!resource.base_address || resource.base_address == base_address) { |
116 | 0 | resource.base_address = base_address; |
117 | 0 | resource.start = |
118 | 0 | std::string_view {char_address, 0}; // NOLINT(bugprone-string-constructor) |
119 | 0 | resource.object_name = object_name; |
120 | 0 | } |
121 | 0 | } |
122 | 18 | if (name.ends_with("_end")) { |
123 | 0 | name = name.substr((name[0] == '_') + strlen("binary_")); |
124 | 0 | name = name.substr(0, name.size() - strlen("_end")); |
125 | |
|
126 | 0 | auto& resource = resources[name]; |
127 | 0 | if (!resource.base_address || resource.base_address == base_address) { |
128 | 0 | resource.base_address = base_address; |
129 | 0 | resource.end = |
130 | 0 | std::string_view {char_address, 0}; // NOLINT(bugprone-string-constructor) |
131 | 0 | resource.object_name = object_name; |
132 | 0 | } |
133 | 0 | } |
134 | 18 | } |
135 | 16.6M | } |
136 | | |
137 | | /// Based on the code of musl-libc and the answer of Kanalpiroge on |
138 | | /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture |
139 | | /// It does not extract all the symbols (but only public - exported and used for dynamic linking), |
140 | | /// but will work if we cannot find or parse ELF files. |
141 | | void collectSymbolsFromProgramHeaders(dl_phdr_info* info, std::vector<SymbolIndex::Symbol>& symbols, |
142 | 13 | SymbolIndex::Resources& resources) { |
143 | | /* Iterate over all headers of the current shared lib |
144 | | * (first call is for the executable itself) |
145 | | */ |
146 | 112 | for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { |
147 | | /* Further processing is only needed if the dynamic section is reached |
148 | | */ |
149 | 99 | if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC) { |
150 | 86 | continue; |
151 | 86 | } |
152 | | |
153 | | /* Get a pointer to the first entry of the dynamic section. |
154 | | * It's address is the shared lib's address + the virtual address |
155 | | */ |
156 | 13 | const ElfW(Dyn)* dyn_begin = reinterpret_cast<const ElfW(Dyn)*>( |
157 | 13 | info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); |
158 | | |
159 | | /// For unknown reason, addresses are sometimes relative sometimes absolute. |
160 | 417 | auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr) { |
161 | 417 | return ptr > base ? ptr : base + ptr; |
162 | 417 | }; |
163 | | |
164 | | /* Iterate over all entries of the dynamic section until the |
165 | | * end of the symbol table is reached. This is indicated by |
166 | | * an entry with d_tag == DT_NULL. |
167 | | */ |
168 | | |
169 | 13 | size_t sym_cnt = 0; |
170 | 132 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
171 | 132 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
172 | | |
173 | | // TODO: this branch leads to invalid address of the hash table. Need further investigation. |
174 | | // if (it->d_tag == DT_HASH) |
175 | | // { |
176 | | // const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(base_address); |
177 | | // sym_cnt = hash[1]; |
178 | | // break; |
179 | | // } |
180 | 132 | if (it->d_tag == DT_GNU_HASH) { |
181 | | /// This code based on Musl-libc. |
182 | | |
183 | 13 | const uint32_t* buckets = nullptr; |
184 | 13 | const uint32_t* hashval = nullptr; |
185 | | |
186 | 13 | const ElfW(Word)* hash = reinterpret_cast<const ElfW(Word)*>(base_address); |
187 | | |
188 | 13 | buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); |
189 | | |
190 | 3.65k | for (ElfW(Word) i = 0; i < hash[0]; ++i) { |
191 | 3.63k | if (buckets[i] > sym_cnt) { |
192 | 2.42k | sym_cnt = buckets[i]; |
193 | 2.42k | } |
194 | 3.63k | } |
195 | | |
196 | 13 | if (sym_cnt) { |
197 | 13 | sym_cnt -= hash[1]; |
198 | 13 | hashval = buckets + hash[0] + sym_cnt; |
199 | 23 | do { |
200 | 23 | ++sym_cnt; |
201 | 23 | } while (!(*hashval++ & 1)); |
202 | 13 | } |
203 | | |
204 | 13 | break; |
205 | 13 | } |
206 | 132 | } |
207 | | |
208 | 13 | if (!sym_cnt) { |
209 | 0 | continue; |
210 | 0 | } |
211 | | |
212 | 13 | const char* strtab = nullptr; |
213 | 139 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
214 | 139 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
215 | | |
216 | 139 | if (it->d_tag == DT_STRTAB) { |
217 | 13 | strtab = reinterpret_cast<const char*>(base_address); |
218 | 13 | break; |
219 | 13 | } |
220 | 139 | } |
221 | | |
222 | 13 | if (!strtab) { |
223 | 0 | continue; |
224 | 0 | } |
225 | | |
226 | 146 | for (const auto* it = dyn_begin; it->d_tag != DT_NULL; ++it) { |
227 | 146 | ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); |
228 | | |
229 | 146 | if (it->d_tag == DT_SYMTAB) { |
230 | | /* Get the pointer to the first entry of the symbol table */ |
231 | 13 | const ElfW(Sym)* elf_sym = reinterpret_cast<const ElfW(Sym)*>(base_address); |
232 | | |
233 | | /* Iterate over the symbol table */ |
234 | 5.71k | for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { |
235 | | /* Get the name of the sym_index-th symbol. |
236 | | * This is located at the address of st_name relative to the beginning of the string table. |
237 | | */ |
238 | 5.69k | const char* sym_name = &strtab[elf_sym[sym_index].st_name]; |
239 | | |
240 | 5.69k | if (!sym_name) { |
241 | 0 | continue; |
242 | 0 | } |
243 | | |
244 | 5.69k | SymbolIndex::Symbol symbol; |
245 | 5.69k | symbol.address_begin = reinterpret_cast<const void*>( |
246 | 5.69k | info->dlpi_addr + elf_sym[sym_index].st_value); |
247 | 5.69k | symbol.address_end = reinterpret_cast<const void*>(info->dlpi_addr + |
248 | 5.69k | elf_sym[sym_index].st_value + |
249 | 5.69k | elf_sym[sym_index].st_size); |
250 | 5.69k | symbol.name = sym_name; |
251 | | |
252 | | /// We are not interested in empty symbols. |
253 | 5.69k | if (elf_sym[sym_index].st_size) { |
254 | 4.72k | symbols.push_back(symbol); |
255 | 4.72k | } |
256 | | |
257 | | /// But resources can be represented by a pair of empty symbols (indicating their boundaries). |
258 | 5.69k | updateResources(base_address, info->dlpi_name, symbol.name, |
259 | 5.69k | symbol.address_begin, resources); |
260 | 5.69k | } |
261 | | |
262 | 13 | break; |
263 | 13 | } |
264 | 146 | } |
265 | 13 | } |
266 | 13 | } |
267 | | |
268 | | #if !defined USE_MUSL |
269 | 13 | std::string getBuildIDFromProgramHeaders(dl_phdr_info* info) { |
270 | 68 | for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { |
271 | 67 | const ElfPhdr& phdr = info->dlpi_phdr[header_index]; |
272 | 67 | if (phdr.p_type != PT_NOTE) { |
273 | 55 | continue; |
274 | 55 | } |
275 | | |
276 | 12 | return Elf::getBuildID(reinterpret_cast<const char*>(info->dlpi_addr + phdr.p_vaddr), |
277 | 12 | phdr.p_memsz); |
278 | 67 | } |
279 | 1 | return {}; |
280 | 13 | } |
281 | | #endif |
282 | | |
283 | | void collectSymbolsFromELFSymbolTable(dl_phdr_info* info, const Elf& elf, |
284 | | const Elf::Section& symbol_table, |
285 | | const Elf::Section& string_table, |
286 | | std::vector<SymbolIndex::Symbol>& symbols, |
287 | 10 | SymbolIndex::Resources& resources) { |
288 | | /// Iterate symbol table. |
289 | 10 | const ElfSym* symbol_table_entry = reinterpret_cast<const ElfSym*>(symbol_table.begin()); |
290 | 10 | const ElfSym* symbol_table_end = reinterpret_cast<const ElfSym*>(symbol_table.end()); |
291 | | |
292 | 10 | const char* strings = string_table.begin(); |
293 | | |
294 | 16.6M | for (; symbol_table_entry < symbol_table_end; ++symbol_table_entry) { |
295 | 16.6M | if (!symbol_table_entry->st_name || !symbol_table_entry->st_value || |
296 | 16.6M | strings + symbol_table_entry->st_name >= elf.end()) { |
297 | 9.27k | continue; |
298 | 9.27k | } |
299 | | |
300 | | /// Find the name in strings table. |
301 | 16.6M | const char* symbol_name = strings + symbol_table_entry->st_name; |
302 | | |
303 | 16.6M | if (!symbol_name) { |
304 | 0 | continue; |
305 | 0 | } |
306 | | |
307 | 16.6M | SymbolIndex::Symbol symbol; |
308 | 16.6M | symbol.address_begin = |
309 | 16.6M | reinterpret_cast<const void*>(info->dlpi_addr + symbol_table_entry->st_value); |
310 | 16.6M | symbol.address_end = reinterpret_cast<const void*>( |
311 | 16.6M | info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); |
312 | 16.6M | symbol.name = symbol_name; |
313 | | |
314 | 16.6M | if (symbol_table_entry->st_size) { |
315 | 15.8M | symbols.push_back(symbol); |
316 | 15.8M | } |
317 | | |
318 | 16.6M | updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, |
319 | 16.6M | resources); |
320 | 16.6M | } |
321 | 10 | } |
322 | | |
323 | | bool searchAndCollectSymbolsFromELFSymbolTable(dl_phdr_info* info, const Elf& elf, |
324 | | unsigned section_header_type, |
325 | | const char* string_table_name, |
326 | | std::vector<SymbolIndex::Symbol>& symbols, |
327 | 12 | SymbolIndex::Resources& resources) { |
328 | 12 | std::optional<Elf::Section> symbol_table; |
329 | 12 | std::optional<Elf::Section> string_table; |
330 | | |
331 | 466 | if (!elf.iterateSections([&](const Elf::Section& section, size_t) { |
332 | 466 | if (section.header.sh_type == section_header_type) { |
333 | 10 | symbol_table.emplace(section); |
334 | 456 | } else if (section.header.sh_type == SHT_STRTAB && |
335 | 456 | 0 == strcmp(section.name(), string_table_name)) { |
336 | 10 | string_table.emplace(section); |
337 | 10 | } |
338 | | |
339 | 466 | return (symbol_table && string_table); |
340 | 466 | })) { |
341 | 2 | return false; |
342 | 2 | } |
343 | | |
344 | 10 | collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); |
345 | 10 | return true; |
346 | 12 | } |
347 | | |
348 | | void collectSymbolsFromELF(dl_phdr_info* info, std::vector<SymbolIndex::Symbol>& symbols, |
349 | | std::vector<SymbolIndex::Object>& objects, |
350 | 13 | SymbolIndex::Resources& resources, std::string& build_id) { |
351 | 13 | std::string object_name; |
352 | 13 | std::string our_build_id; |
353 | | #if defined(USE_MUSL) |
354 | | object_name = "/proc/self/exe"; |
355 | | our_build_id = Elf(object_name).getBuildID(); |
356 | | build_id = our_build_id; |
357 | | #else |
358 | | /// MSan does not know that the program segments in memory are initialized. |
359 | 13 | __msan_unpoison_string(info->dlpi_name); |
360 | | |
361 | 13 | object_name = info->dlpi_name; |
362 | 13 | our_build_id = getBuildIDFromProgramHeaders(info); |
363 | | |
364 | | /// If the name is empty and there is a non-empty build-id - it's main executable. |
365 | | /// Find a elf file for the main executable and set the build-id. |
366 | 13 | if (object_name.empty()) { |
367 | 2 | object_name = "/proc/self/exe"; |
368 | | |
369 | 2 | if (our_build_id.empty()) { |
370 | 1 | our_build_id = Elf(object_name).getBuildID(); |
371 | 1 | } |
372 | | |
373 | 2 | if (build_id.empty()) { |
374 | 1 | build_id = our_build_id; |
375 | 1 | } |
376 | 2 | } |
377 | 13 | #endif |
378 | | |
379 | 13 | std::error_code ec; |
380 | 13 | std::filesystem::path canonical_path = std::filesystem::canonical(object_name, ec); |
381 | 13 | if (ec) { |
382 | 0 | return; |
383 | 0 | } |
384 | | |
385 | | /// Debug info and symbol table sections may be split to separate binary. |
386 | 13 | std::filesystem::path local_debug_info_path = |
387 | 13 | canonical_path.parent_path() / canonical_path.stem(); |
388 | 13 | local_debug_info_path += ".debug"; |
389 | 13 | std::filesystem::path debug_info_path = |
390 | 13 | std::filesystem::path("/usr/lib/debug") / canonical_path.relative_path(); |
391 | 13 | debug_info_path += ".debug"; |
392 | | |
393 | | /// NOTE: This is a workaround for current package system. |
394 | | /// |
395 | | /// Since nfpm cannot copy file only if it exists, |
396 | | /// and so in cmake empty .debug file is created instead, |
397 | | /// but if we will try to load empty Elf file, then the CANNOT_PARSE_ELF |
398 | | /// exception will be thrown from the Elf::Elf. |
399 | 39 | auto exists_not_empty = [](const std::filesystem::path& path) { |
400 | 39 | return std::filesystem::exists(path) && !std::filesystem::is_empty(path); |
401 | 39 | }; |
402 | | |
403 | 13 | if (exists_not_empty(local_debug_info_path)) { |
404 | 0 | object_name = local_debug_info_path; |
405 | 13 | } else if (exists_not_empty(debug_info_path)) { |
406 | 0 | object_name = debug_info_path; |
407 | 13 | } else if (build_id.size() >= 2) { |
408 | | // Check if there is a .debug file in .build-id folder. For example: |
409 | | // /usr/lib/debug/.build-id/e4/0526a12e9a8f3819a18694f6b798f10c624d5c.debug |
410 | 13 | std::string build_id_hex; |
411 | 13 | build_id_hex.resize(build_id.size() * 2); |
412 | | |
413 | 13 | char* pos = build_id_hex.data(); |
414 | 104 | for (auto c : build_id) { |
415 | 104 | vectorized::write_hex_byte_lowercase(c, pos); |
416 | 104 | pos += 2; |
417 | 104 | } |
418 | | |
419 | 13 | std::filesystem::path build_id_debug_info_path( |
420 | 13 | fmt::format("/usr/lib/debug/.build-id/{}/{}.debug", build_id_hex.substr(0, 2), |
421 | 13 | build_id_hex.substr(2))); |
422 | 13 | if (exists_not_empty(build_id_debug_info_path)) { |
423 | 0 | object_name = build_id_debug_info_path; |
424 | 13 | } else { |
425 | 13 | object_name = canonical_path; |
426 | 13 | } |
427 | 13 | } else { |
428 | 0 | object_name = canonical_path; |
429 | 0 | } |
430 | | /// But we have to compare Build ID to check that debug info corresponds to the same executable. |
431 | | |
432 | 13 | SymbolIndex::Object object; |
433 | 13 | object.elf = std::make_unique<Elf>(object_name); |
434 | | |
435 | 13 | std::string file_build_id = object.elf->getBuildID(); |
436 | | |
437 | 13 | if (our_build_id != file_build_id) { |
438 | | /// If debug info doesn't correspond to our binary, fallback to the info in our binary. |
439 | 1 | if (object_name != canonical_path) { |
440 | 0 | object_name = canonical_path; |
441 | 0 | object.elf = std::make_unique<Elf>(object_name); |
442 | | |
443 | | /// But it can still be outdated, for example, if executable file was deleted from filesystem and replaced by another file. |
444 | 0 | file_build_id = object.elf->getBuildID(); |
445 | 0 | if (our_build_id != file_build_id) { |
446 | 0 | return; |
447 | 0 | } |
448 | 1 | } else { |
449 | 1 | return; |
450 | 1 | } |
451 | 1 | } |
452 | | |
453 | 12 | object.address_begin = reinterpret_cast<const void*>(info->dlpi_addr); |
454 | 12 | object.address_end = reinterpret_cast<const void*>(info->dlpi_addr + object.elf->size()); |
455 | 12 | object.name = object_name; |
456 | 12 | objects.push_back(std::move(object)); |
457 | | |
458 | 12 | searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", |
459 | 12 | symbols, resources); |
460 | | |
461 | | /// Unneeded if they were parsed from "program headers" of loaded objects. |
462 | | #if defined USE_MUSL |
463 | | searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", |
464 | | symbols, resources); |
465 | | #endif |
466 | 12 | } |
467 | | |
468 | | /* Callback for dl_iterate_phdr. |
469 | | * Is called by dl_iterate_phdr for every loaded shared lib until something |
470 | | * else than 0 is returned by one call of this function. |
471 | | */ |
472 | 13 | int collectSymbols(dl_phdr_info* info, size_t, void* data_ptr) { |
473 | 13 | SymbolIndex::Data& data = *reinterpret_cast<SymbolIndex::Data*>(data_ptr); |
474 | | |
475 | 13 | collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); |
476 | 13 | collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); |
477 | | |
478 | | /* Continue iterations */ |
479 | 13 | return 0; |
480 | 13 | } |
481 | | |
482 | | template <typename T> |
483 | 676 | const T* find(const void* address, const std::vector<T>& vec) { |
484 | | /// First range that has left boundary greater than address. |
485 | | |
486 | 676 | auto it = std::lower_bound( |
487 | 676 | vec.begin(), vec.end(), address, |
488 | 9.45k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); symbol_index.cpp:_ZZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6SymbolEEEPKT_PKvRKSt6vectorIS4_SaIS4_EEENKUlRKS3_S8_E_clESF_S8_ Line | Count | Source | 488 | 8.09k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); |
symbol_index.cpp:_ZZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6ObjectEEEPKT_PKvRKSt6vectorIS4_SaIS4_EEENKUlRKS3_S8_E_clESF_S8_ Line | Count | Source | 488 | 1.35k | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); |
|
489 | | |
490 | 676 | if (it == vec.begin()) { |
491 | 0 | return nullptr; |
492 | 676 | } else { |
493 | 676 | --it; /// Last range that has left boundary less or equals than address. |
494 | 676 | } |
495 | | |
496 | 676 | if (address >= it->address_begin && address < it->address_end) { |
497 | 676 | return &*it; |
498 | 676 | } else { |
499 | 0 | return nullptr; |
500 | 0 | } |
501 | 676 | } symbol_index.cpp:_ZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6SymbolEEEPKT_PKvRKSt6vectorIS4_SaIS4_EE Line | Count | Source | 483 | 338 | const T* find(const void* address, const std::vector<T>& vec) { | 484 | | /// First range that has left boundary greater than address. | 485 | | | 486 | 338 | auto it = std::lower_bound( | 487 | 338 | vec.begin(), vec.end(), address, | 488 | 338 | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); | 489 | | | 490 | 338 | if (it == vec.begin()) { | 491 | 0 | return nullptr; | 492 | 338 | } else { | 493 | 338 | --it; /// Last range that has left boundary less or equals than address. | 494 | 338 | } | 495 | | | 496 | 338 | if (address >= it->address_begin && address < it->address_end) { | 497 | 338 | return &*it; | 498 | 338 | } else { | 499 | 0 | return nullptr; | 500 | 0 | } | 501 | 338 | } |
symbol_index.cpp:_ZN5doris12_GLOBAL__N_14findINS_11SymbolIndex6ObjectEEEPKT_PKvRKSt6vectorIS4_SaIS4_EE Line | Count | Source | 483 | 338 | const T* find(const void* address, const std::vector<T>& vec) { | 484 | | /// First range that has left boundary greater than address. | 485 | | | 486 | 338 | auto it = std::lower_bound( | 487 | 338 | vec.begin(), vec.end(), address, | 488 | 338 | [](const T& symbol, const void* addr) { return symbol.address_begin <= addr; }); | 489 | | | 490 | 338 | if (it == vec.begin()) { | 491 | 0 | return nullptr; | 492 | 338 | } else { | 493 | 338 | --it; /// Last range that has left boundary less or equals than address. | 494 | 338 | } | 495 | | | 496 | 338 | if (address >= it->address_begin && address < it->address_end) { | 497 | 338 | return &*it; | 498 | 338 | } else { | 499 | 0 | return nullptr; | 500 | 0 | } | 501 | 338 | } |
|
502 | | |
503 | | } // namespace |
504 | | |
505 | 1 | void SymbolIndex::update() { |
506 | 1 | dl_iterate_phdr(collectSymbols, &data); |
507 | 1 | ::pdqsort(data.objects.begin(), data.objects.end(), |
508 | 59 | [](const Object& a, const Object& b) { return a.address_begin < b.address_begin; }); |
509 | 1 | ::pdqsort(data.symbols.begin(), data.symbols.end(), |
510 | 428M | [](const Symbol& a, const Symbol& b) { return a.address_begin < b.address_begin; }); |
511 | | /// We found symbols both from loaded program headers and from ELF symbol tables. |
512 | 1 | data.symbols.erase(std::unique(data.symbols.begin(), data.symbols.end(), |
513 | 15.8M | [](const Symbol& a, const Symbol& b) { |
514 | 15.8M | return a.address_begin == b.address_begin && |
515 | 15.8M | a.address_end == b.address_end; |
516 | 15.8M | }), |
517 | 1 | data.symbols.end()); |
518 | 1 | } |
519 | | |
520 | 338 | const SymbolIndex::Symbol* SymbolIndex::findSymbol(const void* address) const { |
521 | 338 | return find(address, data.symbols); |
522 | 338 | } |
523 | | |
524 | 338 | const SymbolIndex::Object* SymbolIndex::findObject(const void* address) const { |
525 | 338 | return find(address, data.objects); |
526 | 338 | } |
527 | | |
528 | 0 | std::string SymbolIndex::getBuildIDHex() const { |
529 | 0 | std::string build_id_binary = getBuildID(); |
530 | 0 | std::string build_id_hex; |
531 | 0 | build_id_hex.resize(build_id_binary.size() * 2); |
532 | |
|
533 | 0 | char* pos = build_id_hex.data(); |
534 | 0 | for (auto c : build_id_binary) { |
535 | 0 | vectorized::write_hex_byte_uppercase(c, pos); |
536 | 0 | pos += 2; |
537 | 0 | } |
538 | |
|
539 | 0 | return build_id_hex; |
540 | 0 | } |
541 | | |
542 | 19 | MultiVersion<SymbolIndex>& SymbolIndex::instanceImpl() { |
543 | 19 | static MultiVersion<SymbolIndex> instance(std::unique_ptr<SymbolIndex>(new SymbolIndex)); |
544 | 19 | return instance; |
545 | 19 | } |
546 | | |
547 | 19 | MultiVersion<SymbolIndex>::Version SymbolIndex::instance() { |
548 | 19 | return instanceImpl().get(); |
549 | 19 | } |
550 | | |
551 | 0 | void SymbolIndex::reload() { |
552 | 0 | instanceImpl().set(std::unique_ptr<SymbolIndex>(new SymbolIndex)); |
553 | | /// Also drop stacktrace cache. |
554 | 0 | StackTrace::dropCache(); |
555 | 0 | } |
556 | | |
557 | | } // namespace doris |
558 | | |
559 | | #endif |