/root/doris/be/src/common/dwarf.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Dwarf.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #if defined(__ELF__) && !defined(__FreeBSD__) |
24 | | |
25 | | /* |
26 | | * Copyright 2012-present Facebook, Inc. |
27 | | * |
28 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
29 | | * you may not use this file except in compliance with the License. |
30 | | * You may obtain a copy of the License at |
31 | | * |
32 | | * http://www.apache.org/licenses/LICENSE-2.0 |
33 | | * |
34 | | * Unless required by applicable law or agreed to in writing, software |
35 | | * distributed under the License is distributed on an "AS IS" BASIS, |
36 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
37 | | * See the License for the specific language governing permissions and |
38 | | * limitations under the License. |
39 | | */ |
40 | | |
41 | | /** This file was edited for ClickHouse. |
42 | | * Original is from folly library. |
43 | | */ |
44 | | |
45 | | #include <functional> |
46 | | #include <memory> |
47 | | #include <optional> |
48 | | #include <string> |
49 | | #include <string_view> |
50 | | #include <variant> |
51 | | #include <vector> |
52 | | |
53 | | namespace doris { |
54 | | |
55 | | class Elf; |
56 | | |
57 | | /** |
58 | | * DWARF record parser. |
59 | | * |
60 | | * We only implement enough DWARF functionality to convert from PC address |
61 | | * to file and line number information. |
62 | | * |
63 | | * This means (although they're not part of the public API of this class), we |
64 | | * can parse Debug Information Entries (DIEs), abbreviations, attributes (of |
65 | | * all forms), and we can interpret bytecode for the line number VM. |
66 | | * |
67 | | * We can interpret DWARF records of version 2, 3, or 4, although we don't |
68 | | * actually support many of the version 4 features (such as VLIW, multiple |
69 | | * operations per instruction) |
70 | | * |
71 | | * Note that the DWARF record parser does not allocate heap memory at all. |
72 | | * This is on purpose: you can use the parser from |
73 | | * memory-constrained situations (such as an exception handler for |
74 | | * std::out_of_memory) If it weren't for this requirement, some things would |
75 | | * be much simpler: the Path class would be unnecessary and would be replaced |
76 | | * with a std::string; the list of file names in the line number VM would be |
77 | | * kept as a vector of strings instead of re-executing the program to look for |
78 | | * DW_LNE_define_file instructions, etc. |
79 | | */ |
80 | | class Dwarf final { |
81 | | // Note that Dwarf uses (and returns) std::string_view a lot. |
82 | | // The std::string_view point within sections in the ELF file, and so will |
83 | | // be live for as long as the passed-in Elf is live. |
84 | | public: |
85 | | /** Create a DWARF parser around an ELF file. */ |
86 | | explicit Dwarf(const std::shared_ptr<Elf>& elf); |
87 | | |
88 | | /** |
89 | | * More than one location info may exist if current frame is an inline |
90 | | * function call. |
91 | | */ |
92 | | static constexpr uint32_t kMaxInlineLocationInfoPerFrame = 10; |
93 | | |
94 | | /** |
95 | | * Represent a file path a s collection of three parts (base directory, |
96 | | * subdirectory, and file). |
97 | | */ |
98 | | class Path { |
99 | | public: |
100 | 1.35k | Path() = default; |
101 | | |
102 | | Path(std::string_view baseDir, std::string_view subDir, std::string_view file); |
103 | | |
104 | 0 | [[nodiscard]] std::string_view baseDir() const { return baseDir_; } |
105 | 0 | [[nodiscard]] std::string_view subDir() const { return subDir_; } |
106 | 0 | [[nodiscard]] std::string_view file() const { return file_; } |
107 | | |
108 | | [[nodiscard]] size_t size() const; |
109 | | |
110 | | /** |
111 | | * Copy the Path to a buffer of size bufSize. |
112 | | * |
113 | | * toBuffer behaves like snprintf: It will always null-terminate the |
114 | | * buffer (so it will copy at most bufSize-1 bytes), and it will return |
115 | | * the number of bytes that would have been written if there had been |
116 | | * enough room, so, if toBuffer returns a value >= bufSize, the output |
117 | | * was truncated. |
118 | | */ |
119 | | size_t toBuffer(char* buf, size_t bufSize) const; |
120 | | |
121 | | void toString(std::string& dest) const; |
122 | 129 | [[nodiscard]] std::string toString() const { |
123 | 129 | std::string s; |
124 | 129 | toString(s); |
125 | 129 | return s; |
126 | 129 | } |
127 | | |
128 | | // TODO(tudorb): Implement operator==, operator!=; not as easy as it |
129 | | // seems as the same path can be represented in multiple ways |
130 | | private: |
131 | | std::string_view baseDir_; /// NOLINT |
132 | | std::string_view subDir_; /// NOLINT |
133 | | std::string_view file_; /// NOLINT |
134 | | }; |
135 | | |
136 | | // Indicates inline function `name` is called at `line@file`. |
137 | | struct CallLocation { |
138 | | Path file = {}; |
139 | | uint64_t line = 0; |
140 | | std::string_view name; |
141 | | }; |
142 | | |
143 | | enum class LocationInfoMode { |
144 | | // Don't resolve location info. |
145 | | DISABLED, |
146 | | // Perform CU lookup using .debug_aranges (might be incomplete). |
147 | | FAST, |
148 | | // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure. |
149 | | FULL, |
150 | | // Scan .debug_info (super slower, use with caution) for inline functions in |
151 | | // addition to FULL. |
152 | | FULL_WITH_INLINE, |
153 | | }; |
154 | | |
155 | | struct LocationInfo { |
156 | | bool has_main_file = false; |
157 | | Path main_file; |
158 | | |
159 | | bool has_file_and_line = false; |
160 | | Path file; |
161 | | uint64_t line = 0; |
162 | | }; |
163 | | |
164 | | /** |
165 | | * Frame information: symbol name and location. |
166 | | */ |
167 | | struct SymbolizedFrame { |
168 | | bool found = false; |
169 | | uintptr_t addr = 0; |
170 | | // Mangled symbol name. Use `folly::demangle()` to demangle it. |
171 | | const char* name = nullptr; |
172 | | LocationInfo location; |
173 | | std::shared_ptr<const Elf> file; |
174 | | |
175 | 0 | void clear() { *this = SymbolizedFrame(); } |
176 | | }; |
177 | | |
178 | | /** Find the file and line number information corresponding to address. |
179 | | * The address must be physical - offset in object file without offset in virtual memory where the object is loaded. |
180 | | */ |
181 | | bool findAddress(uintptr_t address, LocationInfo& locationInfo, LocationInfoMode mode, |
182 | | std::vector<SymbolizedFrame>& inline_frames) const; |
183 | | |
184 | | private: |
185 | | static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t& offset); |
186 | | |
187 | | std::shared_ptr<const Elf> elf_; /// NOLINT |
188 | | |
189 | | // DWARF section made up of chunks, each prefixed with a length header. |
190 | | // The length indicates whether the chunk is DWARF-32 or DWARF-64, which |
191 | | // guides interpretation of "section offset" records. |
192 | | // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file) |
193 | | class Section { |
194 | | public: |
195 | 0 | Section() : is64_bit(false) {} |
196 | | |
197 | | explicit Section(std::string_view d); |
198 | | |
199 | | // Return next chunk, if any; the 4- or 12-byte length was already |
200 | | // parsed and isn't part of the chunk. |
201 | | bool next(std::string_view& chunk); |
202 | | |
203 | | // Is the current chunk 64 bit? |
204 | 325k | [[nodiscard]] bool is64Bit() const { return is64_bit; } |
205 | | |
206 | | private: |
207 | | // Yes, 32- and 64- bit sections may coexist. Yikes! |
208 | | bool is64_bit; |
209 | | std::string_view data; |
210 | | }; |
211 | | |
212 | | // Abbreviation for a Debugging Information Entry. |
213 | | struct DIEAbbreviation { |
214 | | uint64_t code = 0; |
215 | | uint64_t tag = 0; |
216 | | bool has_children = false; |
217 | | |
218 | | std::string_view attributes; |
219 | | }; |
220 | | |
221 | | // Debugging information entry to define a low-level representation of a |
222 | | // source program. Each debugging information entry consists of an identifying |
223 | | // tag and a series of attributes. An entry, or group of entries together, |
224 | | // provide a description of a corresponding entity in the source program. |
225 | | struct Die { |
226 | | bool is64Bit; /// NOLINT |
227 | | // Offset from start to first attribute |
228 | | uint8_t attr_offset; |
229 | | // Offset within debug info. |
230 | | uint64_t offset; |
231 | | uint64_t code; |
232 | | DIEAbbreviation abbr; |
233 | | }; |
234 | | |
235 | | struct AttributeSpec { |
236 | | uint64_t name = 0; |
237 | | uint64_t form = 0; |
238 | | int64_t implicitConst = 0; // only set when form=DW_FORM_implicit_const |
239 | | |
240 | 3.04k | explicit operator bool() const { return name != 0 || form != 0; } |
241 | | }; |
242 | | |
243 | | struct Attribute { |
244 | | AttributeSpec spec; |
245 | | const Die& die; |
246 | | std::variant<uint64_t, std::string_view> attr_value; |
247 | | }; |
248 | | |
249 | | enum { |
250 | | DW_UT_compile = 0x01, |
251 | | DW_UT_skeleton = 0x04, |
252 | | }; |
253 | | |
254 | | struct CompilationUnit { |
255 | | bool is64Bit = false; /// NOLINT |
256 | | uint8_t version = 0; |
257 | | uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton |
258 | | uint8_t addr_size = 0; |
259 | | // Offset in .debug_info of this compilation unit. |
260 | | uint64_t offset = 0; |
261 | | uint64_t size = 0; |
262 | | // Offset in .debug_info for the first DIE in this compilation unit. |
263 | | uint64_t first_die = 0; |
264 | | uint64_t abbrev_offset = 0; |
265 | | |
266 | | // The beginning of the CU's contribution to .debug_addr |
267 | | std::optional<uint64_t> addr_base; // DW_AT_addr_base (DWARF 5) |
268 | | // The beginning of the offsets table (immediately following the |
269 | | // header) of the CU's contribution to .debug_loclists |
270 | | std::optional<uint64_t> loclists_base; // DW_AT_loclists_base (DWARF 5) |
271 | | // The beginning of the offsets table (immediately following the |
272 | | // header) of the CU's contribution to .debug_rnglists |
273 | | std::optional<uint64_t> rnglists_base; // DW_AT_rnglists_base (DWARF 5) |
274 | | // Points to the first string offset of the compilation unit’s |
275 | | // contribution to the .debug_str_offsets (or .debug_str_offsets.dwo) section. |
276 | | std::optional<uint64_t> str_offsets_base; // DW_AT_str_offsets_base (DWARF 5) |
277 | | |
278 | | // Only the CompilationUnit that contains the caller functions needs this cache. |
279 | | // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size(); |
280 | | std::vector<DIEAbbreviation> abbr_cache; |
281 | | }; |
282 | | |
283 | | /** cu must exist during the life cycle of created Die. */ |
284 | | [[nodiscard]] Die getDieAtOffset(const CompilationUnit& cu, uint64_t offset) const; |
285 | | |
286 | | bool findLocation(uintptr_t address, const LocationInfoMode mode, CompilationUnit& cu, |
287 | | LocationInfo& info, std::vector<SymbolizedFrame>& inline_frames) const; |
288 | | |
289 | | /** |
290 | | * Finds a subprogram debugging info entry that contains a given address among |
291 | | * children of given die. Depth first search. |
292 | | */ |
293 | | void findSubProgramDieForAddress(const CompilationUnit& cu, const Die& die, uint64_t address, |
294 | | std::optional<uint64_t> base_addr_cu, Die& subprogram) const; |
295 | | |
296 | | // Interpreter for the line number bytecode VM |
297 | | class LineNumberVM { |
298 | | public: |
299 | | LineNumberVM(std::string_view data, std::string_view compilationDirectory, |
300 | | std::string_view debugStr, std::string_view debugLineStr); |
301 | | |
302 | | bool findAddress(uintptr_t target, Path& file, uint64_t& line); |
303 | | |
304 | | /** Gets full file name at given index including directory. */ |
305 | | [[nodiscard]] Path getFullFileName(uint64_t index) const; |
306 | | |
307 | | private: |
308 | | void init(); |
309 | | void reset(); |
310 | | |
311 | | // Execute until we commit one new row to the line number matrix |
312 | | bool next(std::string_view& program); |
313 | | enum StepResult { |
314 | | CONTINUE, // Continue feeding opcodes |
315 | | COMMIT, // Commit new <address, file, line> tuple |
316 | | END, // End of sequence |
317 | | }; |
318 | | // Execute one opcode |
319 | | StepResult step(std::string_view& program); |
320 | | |
321 | | struct FileName { |
322 | | std::string_view relativeName; /// NOLINT |
323 | | // 0 = current compilation directory |
324 | | // otherwise, 1-based index in the list of include directories |
325 | | uint64_t directoryIndex; /// NOLINT |
326 | | }; |
327 | | // Read one FileName object, remove_prefix program |
328 | | static bool readFileName(std::string_view& program, FileName& fn); |
329 | | |
330 | | // Get file name at given index; may be in the initial table |
331 | | // (fileNames_) or defined using DW_LNE_define_file (and we reexecute |
332 | | // enough of the program to find it, if so) |
333 | | [[nodiscard]] FileName getFileName(uint64_t index) const; |
334 | | |
335 | | // Get include directory at given index |
336 | | [[nodiscard]] std::string_view getIncludeDirectory(uint64_t index) const; |
337 | | |
338 | | // Execute opcodes until finding a DW_LNE_define_file and return true; |
339 | | // return file at the end. |
340 | | bool nextDefineFile(std::string_view& program, FileName& fn) const; |
341 | | |
342 | | // Initialization |
343 | | bool is64Bit_; /// NOLINT |
344 | | std::string_view data_; /// NOLINT |
345 | | std::string_view compilationDirectory_; /// NOLINT |
346 | | std::string_view debugStr_; // needed for DWARF 5 /// NOLINT |
347 | | std::string_view debugLineStr_; // DWARF 5 /// NOLINT |
348 | | |
349 | | // Header |
350 | | uint16_t version_; /// NOLINT |
351 | | uint8_t minLength_; /// NOLINT |
352 | | bool defaultIsStmt_; /// NOLINT |
353 | | int8_t lineBase_; /// NOLINT |
354 | | uint8_t lineRange_; /// NOLINT |
355 | | uint8_t opcodeBase_; /// NOLINT |
356 | | const uint8_t* standardOpcodeLengths_ = nullptr; /// NOLINT |
357 | | |
358 | | // 6.2.4 The Line Number Program Header. |
359 | | struct { |
360 | | size_t includeDirectoryCount; |
361 | | std::string_view includeDirectories; |
362 | | size_t fileNameCount; |
363 | | std::string_view fileNames; |
364 | | } v4_; |
365 | | |
366 | | struct { |
367 | | uint8_t directoryEntryFormatCount; |
368 | | std::string_view directoryEntryFormat; |
369 | | uint64_t directoriesCount; |
370 | | std::string_view directories; |
371 | | |
372 | | uint8_t fileNameEntryFormatCount; |
373 | | std::string_view fileNameEntryFormat; |
374 | | uint64_t fileNamesCount; |
375 | | std::string_view fileNames; |
376 | | } v5_; |
377 | | |
378 | | // State machine registers |
379 | | uint64_t address_; /// NOLINT |
380 | | uint64_t file_; /// NOLINT |
381 | | uint64_t line_; /// NOLINT |
382 | | uint64_t column_; /// NOLINT |
383 | | bool isStmt_; /// NOLINT |
384 | | bool basicBlock_; /// NOLINT |
385 | | bool endSequence_; /// NOLINT |
386 | | bool prologueEnd_; /// NOLINT |
387 | | bool epilogueBegin_; /// NOLINT |
388 | | uint64_t isa_; /// NOLINT |
389 | | uint64_t discriminator_; /// NOLINT |
390 | | }; |
391 | | |
392 | | /** |
393 | | * Finds inlined subroutine DIEs and their caller lines that contains a given |
394 | | * address among children of given die. Depth first search. |
395 | | */ |
396 | | void findInlinedSubroutineDieForAddress(const CompilationUnit& cu, const Die& die, |
397 | | const LineNumberVM& line_vm, uint64_t address, |
398 | | std::optional<uint64_t> base_addr_cu, |
399 | | std::vector<CallLocation>& locations, |
400 | | const size_t max_size) const; |
401 | | |
402 | | // Read an abbreviation from a std::string_view, return true if at end; remove_prefix section |
403 | | static bool readAbbreviation(std::string_view& section, DIEAbbreviation& abbr); |
404 | | |
405 | | static void readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit& cu); |
406 | | |
407 | | /** |
408 | | * Iterates over all children of a debugging info entry, calling the given |
409 | | * callable for each. Iteration is stopped early if any of the calls return |
410 | | * false. Returns the offset of next DIE after iterations. |
411 | | */ |
412 | | size_t forEachChild(const CompilationUnit& cu, const Die& die, |
413 | | std::function<bool(const Die& die)> f) const; |
414 | | |
415 | | // Get abbreviation corresponding to a code, in the chunk starting at |
416 | | // offset in the .debug_abbrev section |
417 | | [[nodiscard]] DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const; |
418 | | |
419 | | /** |
420 | | * Iterates over all attributes of a debugging info entry, calling the given |
421 | | * callable for each. If all attributes are visited, then return the offset of |
422 | | * next DIE, or else iteration is stopped early and return size_t(-1) if any |
423 | | * of the calls return false. |
424 | | */ |
425 | | size_t forEachAttribute(const CompilationUnit& cu, const Die& die, |
426 | | std::function<bool(const Attribute& die)> f) const; |
427 | | |
428 | | Attribute readAttribute(const CompilationUnit& cu, const Die& die, AttributeSpec spec, |
429 | | std::string_view& info) const; |
430 | | |
431 | | // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end. |
432 | | static AttributeSpec readAttributeSpec(std::string_view& sp); |
433 | | |
434 | | // Read one attribute value, remove_prefix sp |
435 | | using AttributeValue = std::variant<uint64_t, std::string_view>; |
436 | | AttributeValue readAttributeValue(std::string_view& sp, uint64_t form, bool is64_bit) const; |
437 | | |
438 | | // Get an ELF section by name, return true if found |
439 | | std::string_view getSection(const char* name) const; |
440 | | |
441 | | [[nodiscard]] CompilationUnit getCompilationUnit(uint64_t offset) const; |
442 | | // Finds the Compilation Unit starting at offset. |
443 | | [[nodiscard]] CompilationUnit findCompilationUnit(uint64_t targetOffset) const; |
444 | | |
445 | | template <class T> |
446 | | std::optional<T> getAttribute(const CompilationUnit& cu, const Die& die, |
447 | 0 | uint64_t attr_name) const { |
448 | 0 | std::optional<T> result; |
449 | 0 | forEachAttribute(cu, die, [&](const Attribute& attr) { |
450 | 0 | if (attr.spec.name == attr_name) { |
451 | 0 | result = std::get<T>(attr.attr_value); |
452 | 0 | return false; |
453 | 0 | } |
454 | 0 | return true; |
455 | 0 | }); |
456 | 0 | return result; |
457 | 0 | } |
458 | | |
459 | | // Check if the given address is in the range list at the given offset in .debug_ranges. |
460 | | [[nodiscard]] bool isAddrInRangeList(const CompilationUnit& cu, uint64_t address, |
461 | | std::optional<uint64_t> base_addr, size_t offset, |
462 | | uint8_t addr_size) const; |
463 | | |
464 | | std::string_view abbrev_; // .debug_abbrev /// NOLINT |
465 | | std::string_view addr_; // .debug_addr (DWARF 5) /// NOLINT |
466 | | std::string_view aranges_; // .debug_aranges /// NOLINT |
467 | | std::string_view info_; // .debug_info /// NOLINT |
468 | | std::string_view line_; // .debug_line /// NOLINT |
469 | | std::string_view line_str_; // .debug_line_str (DWARF 5) /// NOLINT |
470 | | std::string_view loclists_; // .debug_loclists (DWARF 5) /// NOLINT |
471 | | std::string_view ranges_; // .debug_ranges /// NOLINT |
472 | | std::string_view rnglists_; // .debug_rnglists (DWARF 5) /// NOLINT |
473 | | std::string_view str_; // .debug_str /// NOLINT |
474 | | std::string_view str_offsets_; // .debug_str_offsets (DWARF 5) /// NOLINT |
475 | | }; |
476 | | |
477 | | } // namespace doris |
478 | | |
479 | | #endif |