Coverage Report

Created: 2025-10-23 18:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/common/dwarf.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Dwarf.h
19
// and modified by Doris
20
21
#pragma once
22
23
#if defined(__ELF__) && !defined(__FreeBSD__)
24
25
/*
26
 * Copyright 2012-present Facebook, Inc.
27
 *
28
 * Licensed under the Apache License, Version 2.0 (the "License");
29
 * you may not use this file except in compliance with the License.
30
 * You may obtain a copy of the License at
31
 *
32
 *   http://www.apache.org/licenses/LICENSE-2.0
33
 *
34
 * Unless required by applicable law or agreed to in writing, software
35
 * distributed under the License is distributed on an "AS IS" BASIS,
36
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
37
 * See the License for the specific language governing permissions and
38
 * limitations under the License.
39
 */
40
41
/** This file was edited for ClickHouse.
42
 *  Original is from folly library.
43
  */
44
45
#include <functional>
46
#include <memory>
47
#include <optional>
48
#include <string>
49
#include <string_view>
50
#include <variant>
51
#include <vector>
52
53
namespace doris {
54
55
class Elf;
56
57
/**
58
 * DWARF record parser.
59
 *
60
 * We only implement enough DWARF functionality to convert from PC address
61
 * to file and line number information.
62
 *
63
 * This means (although they're not part of the public API of this class), we
64
 * can parse Debug Information Entries (DIEs), abbreviations, attributes (of
65
 * all forms), and we can interpret bytecode for the line number VM.
66
 *
67
 * We can interpret DWARF records of version 2, 3, or 4, although we don't
68
 * actually support many of the version 4 features (such as VLIW, multiple
69
 * operations per instruction)
70
 *
71
 * Note that the DWARF record parser does not allocate heap memory at all.
72
 * This is on purpose: you can use the parser from
73
 * memory-constrained situations (such as an exception handler for
74
 * std::out_of_memory)  If it weren't for this requirement, some things would
75
 * be much simpler: the Path class would be unnecessary and would be replaced
76
 * with a std::string; the list of file names in the line number VM would be
77
 * kept as a vector of strings instead of re-executing the program to look for
78
 * DW_LNE_define_file instructions, etc.
79
 */
80
class Dwarf final {
81
    // Note that Dwarf uses (and returns) std::string_view a lot.
82
    // The std::string_view point within sections in the ELF file, and so will
83
    // be live for as long as the passed-in Elf is live.
84
public:
85
    /** Create a DWARF parser around an ELF file. */
86
    explicit Dwarf(const std::shared_ptr<Elf>& elf);
87
88
    /**
89
     * More than one location info may exist if current frame is an inline
90
     * function call.
91
     */
92
    static constexpr uint32_t kMaxInlineLocationInfoPerFrame = 10;
93
94
    /**
95
      * Represent a file path a s collection of three parts (base directory,
96
      * subdirectory, and file).
97
      */
98
    class Path {
99
    public:
100
19.7k
        Path() = default;
101
102
        Path(std::string_view baseDir, std::string_view subDir, std::string_view file);
103
104
0
        [[nodiscard]] std::string_view baseDir() const { return baseDir_; }
105
0
        [[nodiscard]] std::string_view subDir() const { return subDir_; }
106
0
        [[nodiscard]] std::string_view file() const { return file_; }
107
108
        [[nodiscard]] size_t size() const;
109
110
        /**
111
        * Copy the Path to a buffer of size bufSize.
112
        *
113
        * toBuffer behaves like snprintf: It will always null-terminate the
114
        * buffer (so it will copy at most bufSize-1 bytes), and it will return
115
        * the number of bytes that would have been written if there had been
116
        * enough room, so, if toBuffer returns a value >= bufSize, the output
117
        * was truncated.
118
        */
119
        size_t toBuffer(char* buf, size_t bufSize) const;
120
121
        void toString(std::string& dest) const;
122
145
        [[nodiscard]] std::string toString() const {
123
145
            std::string s;
124
145
            toString(s);
125
145
            return s;
126
145
        }
127
128
        // TODO(tudorb): Implement operator==, operator!=; not as easy as it
129
        // seems as the same path can be represented in multiple ways
130
    private:
131
        std::string_view baseDir_; /// NOLINT
132
        std::string_view subDir_;  /// NOLINT
133
        std::string_view file_;    /// NOLINT
134
    };
135
136
    // Indicates inline function `name` is called  at `line@file`.
137
    struct CallLocation {
138
        Path file = {};
139
        uint64_t line = 0;
140
        std::string_view name;
141
    };
142
143
    enum class LocationInfoMode {
144
        // Don't resolve location info.
145
        DISABLED,
146
        // Perform CU lookup using .debug_aranges (might be incomplete).
147
        FAST,
148
        // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
149
        FULL,
150
        // Scan .debug_info (super slower, use with caution) for inline functions in
151
        // addition to FULL.
152
        FULL_WITH_INLINE,
153
    };
154
155
    struct LocationInfo {
156
        bool has_main_file = false;
157
        Path main_file;
158
159
        bool has_file_and_line = false;
160
        Path file;
161
        uint64_t line = 0;
162
    };
163
164
    /**
165
     * Frame information: symbol name and location.
166
     */
167
    struct SymbolizedFrame {
168
        bool found = false;
169
        uintptr_t addr = 0;
170
        // Mangled symbol name. Use `folly::demangle()` to demangle it.
171
        const char* name = nullptr;
172
        LocationInfo location;
173
        std::shared_ptr<const Elf> file;
174
175
0
        void clear() { *this = SymbolizedFrame(); }
176
    };
177
178
    /** Find the file and line number information corresponding to address.
179
      * The address must be physical - offset in object file without offset in virtual memory where the object is loaded.
180
      */
181
    bool findAddress(uintptr_t address, LocationInfo& locationInfo, LocationInfoMode mode,
182
                     std::vector<SymbolizedFrame>& inline_frames) const;
183
184
private:
185
    static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t& offset);
186
187
    std::shared_ptr<const Elf> elf_; /// NOLINT
188
189
    // DWARF section made up of chunks, each prefixed with a length header.
190
    // The length indicates whether the chunk is DWARF-32 or DWARF-64, which
191
    // guides interpretation of "section offset" records.
192
    // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file)
193
    class Section {
194
    public:
195
0
        Section() : is64_bit(false) {}
196
197
        explicit Section(std::string_view d);
198
199
        // Return next chunk, if any; the 4- or 12-byte length was already
200
        // parsed and isn't part of the chunk.
201
        bool next(std::string_view& chunk);
202
203
        // Is the current chunk 64 bit?
204
565k
        [[nodiscard]] bool is64Bit() const { return is64_bit; }
205
206
    private:
207
        // Yes, 32- and 64- bit sections may coexist.  Yikes!
208
        bool is64_bit;
209
        std::string_view data;
210
    };
211
212
    // Abbreviation for a Debugging Information Entry.
213
    struct DIEAbbreviation {
214
        uint64_t code = 0;
215
        uint64_t tag = 0;
216
        bool has_children = false;
217
218
        std::string_view attributes;
219
    };
220
221
    // Debugging information entry to define a low-level representation of a
222
    // source program. Each debugging information entry consists of an identifying
223
    // tag and a series of attributes. An entry, or group of entries together,
224
    // provide a description of a corresponding entity in the source program.
225
    struct Die {
226
        bool is64Bit; /// NOLINT
227
        // Offset from start to first attribute
228
        uint8_t attr_offset;
229
        // Offset within debug info.
230
        uint64_t offset;
231
        uint64_t code;
232
        DIEAbbreviation abbr;
233
    };
234
235
    struct AttributeSpec {
236
        uint64_t name = 0;
237
        uint64_t form = 0;
238
        int64_t implicitConst = 0; // only set when form=DW_FORM_implicit_const
239
240
3.43k
        explicit operator bool() const { return name != 0 || form != 0; }
241
    };
242
243
    struct Attribute {
244
        AttributeSpec spec;
245
        const Die& die;
246
        std::variant<uint64_t, std::string_view> attr_value;
247
    };
248
249
    enum {
250
        DW_UT_compile = 0x01,
251
        DW_UT_skeleton = 0x04,
252
    };
253
254
    struct CompilationUnit {
255
        bool is64Bit = false; /// NOLINT
256
        uint8_t version = 0;
257
        uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton
258
        uint8_t addr_size = 0;
259
        // Offset in .debug_info of this compilation unit.
260
        uint64_t offset = 0;
261
        uint64_t size = 0;
262
        // Offset in .debug_info for the first DIE in this compilation unit.
263
        uint64_t first_die = 0;
264
        uint64_t abbrev_offset = 0;
265
266
        // The beginning of the CU's contribution to .debug_addr
267
        std::optional<uint64_t> addr_base; // DW_AT_addr_base (DWARF 5)
268
        // The beginning of the offsets table (immediately following the
269
        // header) of the CU's contribution to .debug_loclists
270
        std::optional<uint64_t> loclists_base; // DW_AT_loclists_base (DWARF 5)
271
        // The beginning of the offsets table (immediately following the
272
        // header) of the CU's contribution to .debug_rnglists
273
        std::optional<uint64_t> rnglists_base; // DW_AT_rnglists_base (DWARF 5)
274
        // Points to the first string offset of the compilation unit’s
275
        // contribution to the .debug_str_offsets (or .debug_str_offsets.dwo) section.
276
        std::optional<uint64_t> str_offsets_base; // DW_AT_str_offsets_base (DWARF 5)
277
278
        // Only the CompilationUnit that contains the caller functions needs this cache.
279
        // Indexed by (abbr.code - 1) if (abbr.code - 1) < abbrCache.size();
280
        std::vector<DIEAbbreviation> abbr_cache;
281
    };
282
283
    /** cu must exist during the life cycle of created Die. */
284
    [[nodiscard]] Die getDieAtOffset(const CompilationUnit& cu, uint64_t offset) const;
285
286
    bool findLocation(uintptr_t address, const LocationInfoMode mode, CompilationUnit& cu,
287
                      LocationInfo& info, std::vector<SymbolizedFrame>& inline_frames) const;
288
289
    /**
290
     * Finds a subprogram debugging info entry that contains a given address among
291
     * children of given die. Depth first search.
292
     */
293
    void findSubProgramDieForAddress(const CompilationUnit& cu, const Die& die, uint64_t address,
294
                                     std::optional<uint64_t> base_addr_cu, Die& subprogram) const;
295
296
    // Interpreter for the line number bytecode VM
297
    class LineNumberVM {
298
    public:
299
        LineNumberVM(std::string_view data, std::string_view compilationDirectory,
300
                     std::string_view debugStr, std::string_view debugLineStr);
301
302
        bool findAddress(uintptr_t target, Path& file, uint64_t& line);
303
304
        /** Gets full file name at given index including directory. */
305
        [[nodiscard]] Path getFullFileName(uint64_t index) const;
306
307
    private:
308
        void init();
309
        void reset();
310
311
        // Execute until we commit one new row to the line number matrix
312
        bool next(std::string_view& program);
313
        enum StepResult {
314
            CONTINUE, // Continue feeding opcodes
315
            COMMIT,   // Commit new <address, file, line> tuple
316
            END,      // End of sequence
317
        };
318
        // Execute one opcode
319
        StepResult step(std::string_view& program);
320
321
        struct FileName {
322
            std::string_view relativeName; /// NOLINT
323
            // 0 = current compilation directory
324
            // otherwise, 1-based index in the list of include directories
325
            uint64_t directoryIndex; /// NOLINT
326
        };
327
        // Read one FileName object, remove_prefix program
328
        static bool readFileName(std::string_view& program, FileName& fn);
329
330
        // Get file name at given index; may be in the initial table
331
        // (fileNames_) or defined using DW_LNE_define_file (and we reexecute
332
        // enough of the program to find it, if so)
333
        [[nodiscard]] FileName getFileName(uint64_t index) const;
334
335
        // Get include directory at given index
336
        [[nodiscard]] std::string_view getIncludeDirectory(uint64_t index) const;
337
338
        // Execute opcodes until finding a DW_LNE_define_file and return true;
339
        // return file at the end.
340
        bool nextDefineFile(std::string_view& program, FileName& fn) const;
341
342
        // Initialization
343
        bool is64Bit_;                          /// NOLINT
344
        std::string_view data_;                 /// NOLINT
345
        std::string_view compilationDirectory_; /// NOLINT
346
        std::string_view debugStr_;             // needed for DWARF 5 /// NOLINT
347
        std::string_view debugLineStr_;         // DWARF 5        /// NOLINT
348
349
        // Header
350
        uint16_t version_;                               /// NOLINT
351
        uint8_t minLength_;                              /// NOLINT
352
        bool defaultIsStmt_;                             /// NOLINT
353
        int8_t lineBase_;                                /// NOLINT
354
        uint8_t lineRange_;                              /// NOLINT
355
        uint8_t opcodeBase_;                             /// NOLINT
356
        const uint8_t* standardOpcodeLengths_ = nullptr; /// NOLINT
357
358
        // 6.2.4 The Line Number Program Header.
359
        struct {
360
            size_t includeDirectoryCount;
361
            std::string_view includeDirectories;
362
            size_t fileNameCount;
363
            std::string_view fileNames;
364
        } v4_;
365
366
        struct {
367
            uint8_t directoryEntryFormatCount;
368
            std::string_view directoryEntryFormat;
369
            uint64_t directoriesCount;
370
            std::string_view directories;
371
372
            uint8_t fileNameEntryFormatCount;
373
            std::string_view fileNameEntryFormat;
374
            uint64_t fileNamesCount;
375
            std::string_view fileNames;
376
        } v5_;
377
378
        // State machine registers
379
        uint64_t address_;       /// NOLINT
380
        uint64_t file_;          /// NOLINT
381
        uint64_t line_;          /// NOLINT
382
        uint64_t column_;        /// NOLINT
383
        bool isStmt_;            /// NOLINT
384
        bool basicBlock_;        /// NOLINT
385
        bool endSequence_;       /// NOLINT
386
        bool prologueEnd_;       /// NOLINT
387
        bool epilogueBegin_;     /// NOLINT
388
        uint64_t isa_;           /// NOLINT
389
        uint64_t discriminator_; /// NOLINT
390
    };
391
392
    /**
393
     * Finds inlined subroutine DIEs and their caller lines that contains a given
394
     * address among children of given die. Depth first search.
395
     */
396
    void findInlinedSubroutineDieForAddress(const CompilationUnit& cu, const Die& die,
397
                                            const LineNumberVM& line_vm, uint64_t address,
398
                                            std::optional<uint64_t> base_addr_cu,
399
                                            std::vector<CallLocation>& locations,
400
                                            const size_t max_size) const;
401
402
    // Read an abbreviation from a std::string_view, return true if at end; remove_prefix section
403
    static bool readAbbreviation(std::string_view& section, DIEAbbreviation& abbr);
404
405
    static void readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit& cu);
406
407
    /**
408
     * Iterates over all children of a debugging info entry, calling the given
409
     * callable for each. Iteration is stopped early if any of the calls return
410
     * false. Returns the offset of next DIE after iterations.
411
     */
412
    size_t forEachChild(const CompilationUnit& cu, const Die& die,
413
                        std::function<bool(const Die& die)> f) const;
414
415
    // Get abbreviation corresponding to a code, in the chunk starting at
416
    // offset in the .debug_abbrev section
417
    [[nodiscard]] DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
418
419
    /**
420
     * Iterates over all attributes of a debugging info entry, calling the given
421
     * callable for each. If all attributes are visited, then return the offset of
422
     * next DIE, or else iteration is stopped early and return size_t(-1) if any
423
     * of the calls return false.
424
     */
425
    size_t forEachAttribute(const CompilationUnit& cu, const Die& die,
426
                            std::function<bool(const Attribute& die)> f) const;
427
428
    Attribute readAttribute(const CompilationUnit& cu, const Die& die, AttributeSpec spec,
429
                            std::string_view& info) const;
430
431
    // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
432
    static AttributeSpec readAttributeSpec(std::string_view& sp);
433
434
    // Read one attribute value, remove_prefix sp
435
    using AttributeValue = std::variant<uint64_t, std::string_view>;
436
    AttributeValue readAttributeValue(std::string_view& sp, uint64_t form, bool is64_bit) const;
437
438
    // Get an ELF section by name, return true if found
439
    std::string_view getSection(const char* name) const;
440
441
    [[nodiscard]] CompilationUnit getCompilationUnit(uint64_t offset) const;
442
    // Finds the Compilation Unit starting at offset.
443
    [[nodiscard]] CompilationUnit findCompilationUnit(uint64_t targetOffset) const;
444
445
    template <class T>
446
    std::optional<T> getAttribute(const CompilationUnit& cu, const Die& die,
447
0
                                  uint64_t attr_name) const {
448
0
        std::optional<T> result;
449
0
        forEachAttribute(cu, die, [&](const Attribute& attr) {
450
0
            if (attr.spec.name == attr_name) {
451
0
                result = std::get<T>(attr.attr_value);
452
0
                return false;
453
0
            }
454
0
            return true;
455
0
        });
456
0
        return result;
457
0
    }
458
459
    // Check if the given address is in the range list at the given offset in .debug_ranges.
460
    [[nodiscard]] bool isAddrInRangeList(const CompilationUnit& cu, uint64_t address,
461
                                         std::optional<uint64_t> base_addr, size_t offset,
462
                                         uint8_t addr_size) const;
463
464
    std::string_view abbrev_;      // .debug_abbrev                     /// NOLINT
465
    std::string_view addr_;        // .debug_addr (DWARF 5)               /// NOLINT
466
    std::string_view aranges_;     // .debug_aranges                   /// NOLINT
467
    std::string_view info_;        // .debug_info                         /// NOLINT
468
    std::string_view line_;        // .debug_line                         /// NOLINT
469
    std::string_view line_str_;    // .debug_line_str (DWARF 5)       /// NOLINT
470
    std::string_view loclists_;    // .debug_loclists (DWARF 5)       /// NOLINT
471
    std::string_view ranges_;      // .debug_ranges                     /// NOLINT
472
    std::string_view rnglists_;    // .debug_rnglists (DWARF 5)       /// NOLINT
473
    std::string_view str_;         // .debug_str                           /// NOLINT
474
    std::string_view str_offsets_; // .debug_str_offsets (DWARF 5) /// NOLINT
475
};
476
477
} // namespace doris
478
479
#endif