Coverage Report

Created: 2025-04-10 14:34

/root/doris/be/src/common/dwarf.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Dwarf.cpp
19
// and modified by Doris
20
21
#if defined(__ELF__) && !defined(__FreeBSD__)
22
23
/*
24
 * Copyright 2012-present Facebook, Inc.
25
 *
26
 * Licensed under the Apache License, Version 2.0 (the "License");
27
 * you may not use this file except in compliance with the License.
28
 * You may obtain a copy of the License at
29
 *
30
 *   http://www.apache.org/licenses/LICENSE-2.0
31
 *
32
 * Unless required by applicable law or agreed to in writing, software
33
 * distributed under the License is distributed on an "AS IS" BASIS,
34
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
 * See the License for the specific language governing permissions and
36
 * limitations under the License.
37
 */
38
39
/** This file was edited for ClickHouse.
40
  */
41
42
#include "common/dwarf.h"
43
44
#include <cstring>
45
46
#include "common/elf.h"
47
#include "common/logging.h"
48
49
8.94k
#define DW_CHILDREN_no 0
50
51
8.94k
#define DW_FORM_addr 1
52
0
#define DW_FORM_block1 0x0a
53
0
#define DW_FORM_block2 3
54
0
#define DW_FORM_block4 4
55
0
#define DW_FORM_block 9
56
0
#define DW_FORM_exprloc 0x18
57
0
#define DW_FORM_data1 0x0b
58
0
#define DW_FORM_ref1 0x11
59
8.94k
#define DW_FORM_data2 0x05
60
8.94k
#define DW_FORM_ref2 0x12
61
0
#define DW_FORM_data4 0x06
62
0
#define DW_FORM_ref4 0x13
63
0
#define DW_FORM_data8 0x07
64
0
#define DW_FORM_ref8 0x14
65
0
#define DW_FORM_ref_sig8 0x20
66
0
#define DW_FORM_sdata 0x0d
67
1.77M
#define DW_FORM_udata 0x0f
68
0
#define DW_FORM_ref_udata 0x15
69
0
#define DW_FORM_flag 0x0c
70
0
#define DW_FORM_flag_present 0x19
71
43.6k
#define DW_FORM_sec_offset 0x17
72
43.6k
#define DW_FORM_ref_addr 0x10
73
0
#define DW_FORM_string 0x08
74
0
#define DW_FORM_strp 0x0e
75
0
#define DW_FORM_indirect 0x16
76
0
#define DW_FORM_strx 0x1a
77
0
#define DW_FORM_addrx 0x1b
78
0
#define DW_FORM_ref_sup4 0x1c
79
0
#define DW_FORM_strp_sup 0x1d
80
0
#define DW_FORM_data16 0x1e
81
1.97M
#define DW_FORM_line_strp 0x1f
82
212k
#define DW_FORM_implicit_const 0x21
83
8.94k
#define DW_FORM_rnglistx 0x23
84
0
#define DW_FORM_loclistx 0x22
85
0
#define DW_FORM_ref_sup8 0x24
86
26.8k
#define DW_FORM_strx1 0x25
87
0
#define DW_FORM_strx2 0x26
88
0
#define DW_FORM_strx3 0x27
89
0
#define DW_FORM_strx4 0x28
90
0
#define DW_FORM_addrx1 0x29
91
0
#define DW_FORM_addrx2 0x2a
92
0
#define DW_FORM_addrx3 0x2b
93
0
#define DW_FORM_addrx4 0x2c
94
95
4.47k
#define DW_TAG_compile_unit 0x11
96
0
#define DW_TAG_subprogram 0x2e
97
0
#define DW_TAG_try_block 0x32
98
0
#define DW_TAG_catch_block 0x25
99
0
#define DW_TAG_entry_point 0x03
100
0
#define DW_TAG_common_block 0x1a
101
0
#define DW_TAG_lexical_block 0x0b
102
103
4.47k
#define DW_AT_stmt_list 0x10
104
4.47k
#define DW_AT_comp_dir 0x1b
105
4.47k
#define DW_AT_name 0x03
106
0
#define DW_AT_high_pc 0x12
107
4.47k
#define DW_AT_low_pc 0x11
108
4.47k
#define DW_AT_entry_pc 0x52
109
0
#define DW_AT_ranges 0x55
110
0
#define DW_AT_abstract_origin 0x31
111
0
#define DW_AT_call_line 0x59
112
0
#define DW_AT_call_file 0x58
113
0
#define DW_AT_linkage_name 0x6e
114
0
#define DW_AT_specification 0x47
115
4.47k
#define DW_AT_str_offsets_base 0x72
116
4.47k
#define DW_AT_addr_base 0x73
117
4.47k
#define DW_AT_rnglists_base 0x74
118
3.92k
#define DW_AT_loclists_base 0x8c
119
4.47k
#define DW_AT_GNU_ranges_base 0x2132
120
4.47k
#define DW_AT_GNU_addr_base 0x2133
121
122
0
#define DW_LNE_define_file 0x03
123
8.50M
#define DW_LNS_copy 0x01
124
11.7M
#define DW_LNS_advance_pc 0x02
125
52.9M
#define DW_LNS_advance_line 0x03
126
3.90M
#define DW_LNS_set_file 0x04
127
28.4M
#define DW_LNS_set_column 0x05
128
16.7M
#define DW_LNS_negate_stmt 0x06
129
0
#define DW_LNS_set_basic_block 0x07
130
15.6M
#define DW_LNS_const_add_pc 0x08
131
0
#define DW_LNS_fixed_advance_pc 0x09
132
2.78M
#define DW_LNS_set_prologue_end 0x0a
133
2.73M
#define DW_LNS_set_epilogue_begin 0x0b
134
0
#define DW_LNS_set_isa 0x0c
135
2.49M
#define DW_LNE_end_sequence 0x01
136
2.50M
#define DW_LNE_set_address 0x02
137
0
#define DW_LNE_set_discriminator 0x04
138
139
1.98M
#define DW_LNCT_path 0x1
140
1.77M
#define DW_LNCT_directory_index 0x2
141
0
#define DW_LNCT_timestamp 0x3
142
0
#define DW_LNCT_size 0x4
143
0
#define DW_LNCT_MD5 0x5
144
145
0
#define DW_RLE_end_of_list 0x0
146
0
#define DW_RLE_base_addressx 0x1
147
0
#define DW_RLE_startx_endx 0x2
148
0
#define DW_RLE_startx_length 0x3
149
0
#define DW_RLE_offset_pair 0x4
150
0
#define DW_RLE_base_address 0x5
151
0
#define DW_RLE_start_end 0x6
152
0
#define DW_RLE_start_length 0x7
153
154
namespace doris {
155
156
Dwarf::Dwarf(const std::shared_ptr<Elf>& elf)
157
        : elf_(elf),
158
          abbrev_(getSection(".debug_abbrev")),
159
          addr_(getSection(".debug_addr")),
160
          aranges_(getSection(".debug_aranges")),
161
          info_(getSection(".debug_info")),
162
          line_(getSection(".debug_line")),
163
          line_str_(getSection(".debug_line_str")),
164
          loclists_(getSection(".debug_loclists")),
165
          ranges_(getSection(".debug_ranges")),
166
          rnglists_(getSection(".debug_rnglists")),
167
          str_(getSection(".debug_str")),
168
1.06k
          str_offsets_(getSection(".debug_str_offsets")) {
169
    // Optional sections:
170
    //  - debugAranges_: for fast address range lookup.
171
    //     If missing .debug_info can be used - but it's much slower (linear
172
    //     scan).
173
    //  - debugRanges_ (DWARF 4) / debugRnglists_ (DWARF 5): non-contiguous
174
    //    address ranges of debugging information entries.
175
    //    Used for inline function address lookup.
176
1.06k
    if (info_.empty() || abbrev_.empty() || line_.empty() || str_.empty()) {
177
533
        elf_ = nullptr;
178
533
    }
179
1.06k
}
180
181
14.2k
Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) {}
182
183
#define SAFE_CHECK(cond, ...)                                \
184
50.1G
    do {                                                     \
185
50.1G
        if (!(cond)) LOG(FATAL) << fmt::format(__VA_ARGS__); \
186
50.1G
    } while (false)
187
188
namespace {
189
// Maximum number of DIEAbbreviation to cache in a compilation unit. Used to
190
// speed up inline function lookup.
191
const uint32_t kMaxAbbreviationEntries = 1000;
192
193
// All following read* functions read from a std::string_view, advancing the
194
// std::string_view, and aborting if there's not enough room.
195
196
// Read (bitwise) one object of type T
197
template <typename T>
198
    requires std::is_trivial_v<T> && std::is_standard_layout_v<T>
199
50.0G
T read(std::string_view& sp) {
200
50.0G
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
50.0G
               sp.size());
202
50.0G
    T x;
203
50.0G
    memcpy(&x, sp.data(), sizeof(T));
204
50.0G
    sp.remove_prefix(sizeof(T));
205
50.0G
    return x;
206
50.0G
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIhEET_RSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
199
406M
T read(std::string_view& sp) {
200
406M
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
406M
               sp.size());
202
406M
    T x;
203
406M
    memcpy(&x, sp.data(), sizeof(T));
204
406M
    sp.remove_prefix(sizeof(T));
205
406M
    return x;
206
406M
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readImEET_RSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
199
49.6G
T read(std::string_view& sp) {
200
49.6G
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
49.6G
               sp.size());
202
49.6G
    T x;
203
49.6G
    memcpy(&x, sp.data(), sizeof(T));
204
49.6G
    sp.remove_prefix(sizeof(T));
205
49.6G
    return x;
206
49.6G
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIjEET_RSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
199
23.7M
T read(std::string_view& sp) {
200
23.7M
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
23.7M
               sp.size());
202
23.7M
    T x;
203
23.7M
    memcpy(&x, sp.data(), sizeof(T));
204
23.7M
    sp.remove_prefix(sizeof(T));
205
23.7M
    return x;
206
23.7M
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readItEET_RSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
199
10.8M
T read(std::string_view& sp) {
200
10.8M
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
10.8M
               sp.size());
202
10.8M
    T x;
203
10.8M
    memcpy(&x, sp.data(), sizeof(T));
204
10.8M
    sp.remove_prefix(sizeof(T));
205
10.8M
    return x;
206
10.8M
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIaEET_RSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
199
4.47k
T read(std::string_view& sp) {
200
4.47k
    SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T),
201
4.47k
               sp.size());
202
4.47k
    T x;
203
4.47k
    memcpy(&x, sp.data(), sizeof(T));
204
4.47k
    sp.remove_prefix(sizeof(T));
205
4.47k
    return x;
206
4.47k
}
207
208
// Read (bitwise) an unsigned number of N bytes (N in 1, 2, 3, 4).
209
template <size_t N>
210
26.8k
uint64_t readU64(std::string_view& sp) {
211
26.8k
    SAFE_CHECK(sp.size() >= N, "underflow");
212
26.8k
    uint64_t x = 0;
213
26.8k
    memcpy(&x, sp.data(), N);
214
26.8k
    sp.remove_prefix(N);
215
26.8k
    return x;
216
26.8k
}
dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm1EEEmRSt17basic_string_viewIcSt11char_traitsIcEE
Line
Count
Source
210
26.8k
uint64_t readU64(std::string_view& sp) {
211
26.8k
    SAFE_CHECK(sp.size() >= N, "underflow");
212
26.8k
    uint64_t x = 0;
213
26.8k
    memcpy(&x, sp.data(), N);
214
26.8k
    sp.remove_prefix(N);
215
26.8k
    return x;
216
26.8k
}
Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm2EEEmRSt17basic_string_viewIcSt11char_traitsIcEE
Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm3EEEmRSt17basic_string_viewIcSt11char_traitsIcEE
Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm4EEEmRSt17basic_string_viewIcSt11char_traitsIcEE
217
218
// Read ULEB (unsigned) varint value; algorithm from the DWARF spec
219
111M
uint64_t readULEB(std::string_view& sp, uint8_t& shift, uint8_t& val) {
220
111M
    uint64_t r = 0;
221
111M
    shift = 0;
222
161M
    do {
223
161M
        val = read<uint8_t>(sp);
224
161M
        r |= (uint64_t(val & 0x7f) << shift);
225
161M
        shift += 7;
226
161M
    } while (val & 0x80);
227
111M
    return r;
228
111M
}
229
230
58.7M
uint64_t readULEB(std::string_view& sp) {
231
58.7M
    uint8_t shift;
232
58.7M
    uint8_t val;
233
58.7M
    return readULEB(sp, shift, val);
234
58.7M
}
235
236
// Read SLEB (signed) varint value; algorithm from the DWARF spec
237
52.9M
int64_t readSLEB(std::string_view& sp) {
238
52.9M
    uint8_t shift;
239
52.9M
    uint8_t val;
240
52.9M
    uint64_t r = readULEB(sp, shift, val);
241
242
52.9M
    if (shift < 64 && (val & 0x40)) {
243
25.7M
        r |= -(1ULL << shift); // sign extend
244
25.7M
    }
245
246
52.9M
    return r;
247
52.9M
}
248
249
// Read a value of "section offset" type, which may be 4 or 8 bytes
250
12.9M
uint64_t readOffset(std::string_view& sp, bool is64_bit) {
251
12.9M
    return is64_bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
252
12.9M
}
253
254
// Read "len" bytes
255
0
std::string_view readBytes(std::string_view& sp, uint64_t len) {
256
0
    SAFE_CHECK(len <= sp.size(), "invalid string length: {} vs. {}", len, sp.size());
257
0
    std::string_view ret(sp.data(), len);
258
0
    sp.remove_prefix(len);
259
0
    return ret;
260
0
}
261
262
// Read a null-terminated string
263
1.99M
std::string_view readNullTerminated(std::string_view& sp) {
264
1.99M
    const char* p = static_cast<const char*>(memchr(sp.data(), 0, sp.size()));
265
1.99M
    SAFE_CHECK(p, "invalid null-terminated string");
266
1.99M
    std::string_view ret(sp.data(), p - sp.data());
267
1.99M
    sp = std::string_view(p + 1, sp.size());
268
1.99M
    return ret;
269
1.99M
}
270
271
// Get a string from the section
272
1.99M
std::string_view getStringFromStringSection(std::string_view section, uint64_t offset) {
273
1.99M
    SAFE_CHECK(offset < section.size(), "invalid section offset");
274
1.99M
    std::string_view sp(section);
275
1.99M
    sp.remove_prefix(offset);
276
1.99M
    return readNullTerminated(sp);
277
1.99M
}
278
279
// Skip over padding until sp.data() - start is a multiple of alignment
280
10.8M
void skipPadding(std::string_view& sp, const char* start, size_t alignment) {
281
10.8M
    size_t remainder = (sp.data() - start) % alignment;
282
10.8M
    if (remainder) {
283
10.8M
        SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
284
10.8M
        sp.remove_prefix(alignment - remainder);
285
10.8M
    }
286
10.8M
}
287
288
} // namespace
289
290
Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file)
291
8.94k
        : baseDir_(baseDir), subDir_(subDir), file_(file) {
292
    // Normalize
293
8.94k
    if (file_.empty()) {
294
0
        baseDir_ = {};
295
0
        subDir_ = {};
296
0
        return;
297
0
    }
298
299
8.94k
    if (file_[0] == '/') {
300
        // file_ is absolute
301
4.47k
        baseDir_ = {};
302
4.47k
        subDir_ = {};
303
4.47k
    }
304
305
8.94k
    if (!subDir_.empty() && subDir_[0] == '/') {
306
4.47k
        baseDir_ = {}; // subDir_ is absolute
307
4.47k
    }
308
309
    // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
310
8.94k
    if (baseDir_.empty()) {
311
8.94k
        swap(baseDir_, subDir_);
312
8.94k
    }
313
8.94k
}
314
315
8.94k
size_t Dwarf::Path::size() const {
316
8.94k
    size_t size = 0;
317
8.94k
    bool needs_slash = false;
318
319
8.94k
    if (!baseDir_.empty()) {
320
8.94k
        size += baseDir_.size();
321
8.94k
        needs_slash = baseDir_.back() != '/';
322
8.94k
    }
323
324
8.94k
    if (!subDir_.empty()) {
325
0
        size += needs_slash;
326
0
        size += subDir_.size();
327
0
        needs_slash = subDir_.back() != '/';
328
0
    }
329
330
8.94k
    if (!file_.empty()) {
331
8.94k
        size += needs_slash;
332
8.94k
        size += file_.size();
333
8.94k
    }
334
335
8.94k
    return size;
336
8.94k
}
337
338
0
size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
339
0
    size_t total_size = 0;
340
0
    bool needs_slash = false;
341
342
0
    auto append = [&](std::string_view sp) {
343
0
        if (bufSize >= 2) {
344
0
            size_t to_copy = std::min(sp.size(), bufSize - 1);
345
0
            memcpy(buf, sp.data(), to_copy);
346
0
            buf += to_copy;
347
0
            bufSize -= to_copy;
348
0
        }
349
0
        total_size += sp.size();
350
0
    };
351
352
0
    if (!baseDir_.empty()) {
353
0
        append(baseDir_);
354
0
        needs_slash = baseDir_.back() != '/';
355
0
    }
356
0
    if (!subDir_.empty()) {
357
0
        if (needs_slash) {
358
0
            append("/");
359
0
        }
360
0
        append(subDir_);
361
0
        needs_slash = subDir_.back() != '/';
362
0
    }
363
0
    if (!file_.empty()) {
364
0
        if (needs_slash) {
365
0
            append("/");
366
0
        }
367
0
        append(file_);
368
0
    }
369
0
    if (bufSize) {
370
0
        *buf = '\0';
371
0
    }
372
373
0
    SAFE_CHECK(total_size == size(), "Size mismatch");
374
0
    return total_size;
375
0
}
376
377
4.47k
void Dwarf::Path::toString(std::string& dest) const {
378
4.47k
    size_t initial_size = dest.size();
379
4.47k
    dest.reserve(initial_size + size());
380
4.47k
    if (!baseDir_.empty()) {
381
4.47k
        dest.append(baseDir_.begin(), baseDir_.end());
382
4.47k
    }
383
4.47k
    if (!subDir_.empty()) {
384
0
        if (!dest.empty() && dest.back() != '/') {
385
0
            dest.push_back('/');
386
0
        }
387
0
        dest.append(subDir_.begin(), subDir_.end());
388
0
    }
389
4.47k
    if (!file_.empty()) {
390
4.47k
        if (!dest.empty() && dest.back() != '/') {
391
4.47k
            dest.push_back('/');
392
4.47k
        }
393
4.47k
        dest.append(file_.begin(), file_.end());
394
4.47k
    }
395
4.47k
    SAFE_CHECK(dest.size() == initial_size + size(), "Size mismatch");
396
4.47k
}
397
398
// Next chunk in section
399
10.8M
bool Dwarf::Section::next(std::string_view& chunk) {
400
10.8M
    chunk = data;
401
10.8M
    if (chunk.empty()) {
402
5.31k
        return false;
403
5.31k
    }
404
405
    // Initial length is a uint32_t value for a 32-bit section, and
406
    // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
407
    // section.
408
10.8M
    auto initial_length = read<uint32_t>(chunk);
409
10.8M
    is64_bit = (initial_length == uint32_t(-1));
410
10.8M
    auto length = is64_bit ? read<uint64_t>(chunk) : initial_length;
411
10.8M
    SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
412
10.8M
    chunk = std::string_view(chunk.data(), length);
413
10.8M
    data = std::string_view(chunk.end(), data.end() - chunk.end());
414
10.8M
    return true;
415
10.8M
}
416
417
11.7k
std::string_view Dwarf::getSection(const char* name) const {
418
11.7k
    std::optional<Elf::Section> elf_section = elf_->findSectionByName(name);
419
11.7k
    if (!elf_section) {
420
5.86k
        return {};
421
5.86k
    }
422
423
5.85k
#ifdef SHF_COMPRESSED
424
5.85k
    if (elf_section->header.sh_flags & SHF_COMPRESSED) {
425
0
        return {};
426
0
    }
427
5.85k
#endif
428
429
5.85k
    return {elf_section->begin(), elf_section->size()};
430
5.85k
}
431
432
// static
433
8.94k
bool Dwarf::readAbbreviation(std::string_view& section, DIEAbbreviation& abbr) {
434
    // abbreviation code
435
8.94k
    abbr.code = readULEB(section);
436
8.94k
    if (abbr.code == 0) {
437
0
        return false;
438
0
    }
439
440
    // abbreviation tag
441
8.94k
    abbr.tag = readULEB(section);
442
443
    // does this entry have children?
444
8.94k
    abbr.has_children = (read<uint8_t>(section) != DW_CHILDREN_no);
445
446
    // attributes
447
8.94k
    const char* attribute_begin = section.data();
448
106k
    for (;;) {
449
106k
        SAFE_CHECK(!section.empty(), "invalid attribute section");
450
106k
        auto attr = readAttributeSpec(section);
451
106k
        if (attr.name == 0 && attr.form == 0) {
452
8.94k
            break;
453
8.94k
        }
454
106k
    }
455
456
8.94k
    abbr.attributes = std::string_view(attribute_begin, section.data() - attribute_begin);
457
8.94k
    return true;
458
8.94k
}
459
460
// static
461
0
void Dwarf::readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit& cu) {
462
0
    abbrev.remove_prefix(cu.abbrev_offset);
463
464
0
    DIEAbbreviation abbr;
465
0
    while (readAbbreviation(abbrev, abbr)) {
466
        // Abbreviation code 0 is reserved for null debugging information entries.
467
0
        if (abbr.code != 0 && abbr.code <= kMaxAbbreviationEntries) {
468
0
            cu.abbr_cache[abbr.code - 1] = abbr;
469
0
        }
470
0
    }
471
0
}
472
473
size_t Dwarf::forEachChild(const CompilationUnit& cu, const Die& die,
474
0
                           std::function<bool(const Die& die)> f) const {
475
0
    size_t next_die_offset = forEachAttribute(cu, die, [&](const Attribute&) { return true; });
476
0
    if (!die.abbr.has_children) {
477
0
        return next_die_offset;
478
0
    }
479
480
0
    auto child_die = getDieAtOffset(cu, next_die_offset);
481
0
    while (child_die.code != 0) {
482
0
        if (!f(child_die)) {
483
0
            return child_die.offset;
484
0
        }
485
486
        // NOTE: Don't run `f` over grandchildren, just skip over them.
487
0
        size_t sibling_offset = forEachChild(cu, child_die, [](const Die&) { return true; });
488
0
        child_die = getDieAtOffset(cu, sibling_offset);
489
0
    }
490
491
    // childDie is now a dummy die whose offset is to the code 0 marking the
492
    // end of the children. Need to add one to get the offset of the next die.
493
0
    return child_die.offset + 1;
494
0
}
495
496
/*
497
 * Iterate over all attributes of the given DIE, calling the given callable
498
 * for each. Iteration is stopped early if any of the calls return false.
499
 */
500
size_t Dwarf::forEachAttribute(const CompilationUnit& cu, const Die& die,
501
8.94k
                               std::function<bool(const Attribute& die)> f) const {
502
8.94k
    auto attrs = die.abbr.attributes;
503
8.94k
    auto values = std::string_view {info_.data() + die.offset + die.attr_offset,
504
8.94k
                                    cu.offset + cu.size - die.offset - die.attr_offset};
505
106k
    while (auto spec = readAttributeSpec(attrs)) {
506
97.3k
        auto attr = readAttribute(cu, die, spec, values);
507
97.3k
        if (!f(attr)) {
508
0
            return static_cast<size_t>(-1);
509
0
        }
510
97.3k
    }
511
8.94k
    return values.data() - info_.data();
512
8.94k
}
513
514
Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit& cu, const Die& die, AttributeSpec spec,
515
97.3k
                                      std::string_view& info) const {
516
    // DWARF 5 introduces new FORMs whose values are relative to some base attrs:
517
    // DW_AT_str_offsets_base, DW_AT_rnglists_base, DW_AT_addr_base.
518
    // Debug Fission DWARF 4 uses GNU DW_AT_GNU_ranges_base & DW_AT_GNU_addr_base.
519
    //
520
    // The order in which attributes appear in a CU is not defined.
521
    // The DW_AT_*_base attrs may appear after attributes that need them.
522
    // The DW_AT_*_base attrs are CU specific; so we read them just after
523
    // reading the CU header. During this first pass return empty values
524
    // when encountering a FORM that depends on DW_AT_*_base.
525
97.3k
    auto get_string_using_offset_table = [&](uint64_t index) {
526
26.8k
        if (!cu.str_offsets_base.has_value()) {
527
8.94k
            return std::string_view();
528
8.94k
        }
529
        // DWARF 5: 7.26 String Offsets Table
530
        // The DW_AT_str_offsets_base attribute points to the first entry following
531
        // the header. The entries are indexed sequentially from this base entry,
532
        // starting from 0.
533
17.8k
        auto sp = str_offsets_.substr(*cu.str_offsets_base +
534
17.8k
                                      index * (cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t)));
535
17.8k
        uint64_t str_offset = readOffset(sp, cu.is64Bit);
536
17.8k
        return getStringFromStringSection(str_, str_offset);
537
26.8k
    };
538
539
97.3k
    auto read_debug_addr = [&](uint64_t index) {
540
0
        if (!cu.addr_base.has_value()) {
541
0
            return uint64_t(0);
542
0
        }
543
        // DWARF 5: 7.27 Address Table
544
        // The DW_AT_addr_base attribute points to the first entry following the
545
        // header. The entries are indexed sequentially from this base entry,
546
        // starting from 0.
547
0
        auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t));
548
0
        return read<uint64_t>(sp);
549
0
    };
550
551
97.3k
    switch (spec.form) {
552
8.94k
    case DW_FORM_addr:
553
8.94k
        return {spec, die, read<uintptr_t>(info)};
554
0
    case DW_FORM_block1:
555
0
        return {spec, die, readBytes(info, read<uint8_t>(info))};
556
0
    case DW_FORM_block2:
557
0
        return {spec, die, readBytes(info, read<uint16_t>(info))};
558
0
    case DW_FORM_block4:
559
0
        return {spec, die, readBytes(info, read<uint32_t>(info))};
560
0
    case DW_FORM_block:
561
0
        [[fallthrough]];
562
0
    case DW_FORM_exprloc:
563
0
        return {spec, die, readBytes(info, readULEB(info))};
564
0
    case DW_FORM_data1:
565
0
        [[fallthrough]];
566
0
    case DW_FORM_ref1:
567
0
        return {spec, die, read<uint8_t>(info)};
568
8.94k
    case DW_FORM_data2:
569
8.94k
        [[fallthrough]];
570
8.94k
    case DW_FORM_ref2:
571
8.94k
        return {spec, die, read<uint16_t>(info)};
572
0
    case DW_FORM_data4:
573
0
        [[fallthrough]];
574
0
    case DW_FORM_ref4:
575
0
        return {spec, die, read<uint32_t>(info)};
576
0
    case DW_FORM_data8:
577
0
        [[fallthrough]];
578
0
    case DW_FORM_ref8:
579
0
        [[fallthrough]];
580
0
    case DW_FORM_ref_sig8:
581
0
        return {spec, die, read<uint64_t>(info)};
582
0
    case DW_FORM_sdata:
583
0
        return {spec, die, static_cast<uint64_t>(readSLEB(info))};
584
0
    case DW_FORM_udata:
585
0
        [[fallthrough]];
586
0
    case DW_FORM_ref_udata:
587
0
        return {spec, die, readULEB(info)};
588
0
    case DW_FORM_flag:
589
0
        return {spec, die, read<uint8_t>(info)};
590
0
    case DW_FORM_flag_present:
591
0
        return {spec, die, 1ULL};
592
43.6k
    case DW_FORM_sec_offset:
593
43.6k
        [[fallthrough]];
594
43.6k
    case DW_FORM_ref_addr:
595
43.6k
        return {spec, die, readOffset(info, die.is64Bit)};
596
0
    case DW_FORM_string:
597
0
        return {spec, die, readNullTerminated(info)};
598
0
    case DW_FORM_strp:
599
0
        return {spec, die, getStringFromStringSection(str_, readOffset(info, die.is64Bit))};
600
0
    case DW_FORM_indirect: // form is explicitly specified
601
        // Update spec with the actual FORM.
602
0
        spec.form = readULEB(info);
603
0
        return readAttribute(cu, die, spec, info);
604
605
    // DWARF 5:
606
0
    case DW_FORM_implicit_const: // form is explicitly specified
607
        // For attributes with this form, the attribute specification contains a
608
        // third part, which is a signed LEB128 number. The value of this number
609
        // is used as the value of the attribute, and no value is stored in the
610
        // .debug_info section.
611
0
        return {spec, die, static_cast<uint64_t>(spec.implicitConst)};
612
613
0
    case DW_FORM_addrx:
614
0
        return {spec, die, read_debug_addr(readULEB(info))};
615
0
    case DW_FORM_addrx1:
616
0
        return {spec, die, read_debug_addr(readU64<1>(info))};
617
0
    case DW_FORM_addrx2:
618
0
        return {spec, die, read_debug_addr(readU64<2>(info))};
619
0
    case DW_FORM_addrx3:
620
0
        return {spec, die, read_debug_addr(readU64<3>(info))};
621
0
    case DW_FORM_addrx4:
622
0
        return {spec, die, read_debug_addr(readU64<4>(info))};
623
624
0
    case DW_FORM_line_strp:
625
0
        return {spec, die, getStringFromStringSection(line_str_, readOffset(info, die.is64Bit))};
626
627
0
    case DW_FORM_strx:
628
0
        return {spec, die, get_string_using_offset_table(readULEB(info))};
629
26.8k
    case DW_FORM_strx1:
630
26.8k
        return {spec, die, get_string_using_offset_table(readU64<1>(info))};
631
0
    case DW_FORM_strx2:
632
0
        return {spec, die, get_string_using_offset_table(readU64<2>(info))};
633
0
    case DW_FORM_strx3:
634
0
        return {spec, die, get_string_using_offset_table(readU64<3>(info))};
635
0
    case DW_FORM_strx4:
636
0
        return {spec, die, get_string_using_offset_table(readU64<4>(info))};
637
638
8.94k
    case DW_FORM_rnglistx: {
639
8.94k
        auto index = readULEB(info);
640
8.94k
        if (!cu.rnglists_base.has_value()) {
641
4.47k
            return {spec, die, 0ULL};
642
4.47k
        }
643
4.47k
        const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t);
644
4.47k
        auto sp = rnglists_.substr(*cu.rnglists_base + index * offset_size);
645
4.47k
        auto offset = readOffset(sp, cu.is64Bit);
646
4.47k
        return {spec, die, *cu.rnglists_base + offset};
647
8.94k
    }
648
649
0
    case DW_FORM_loclistx: {
650
0
        auto index = readULEB(info);
651
0
        if (!cu.loclists_base.has_value()) {
652
0
            return {spec, die, 0ULL};
653
0
        }
654
0
        const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t);
655
0
        auto sp = loclists_.substr(*cu.loclists_base + index * offset_size);
656
0
        auto offset = readOffset(sp, cu.is64Bit);
657
0
        return {spec, die, *cu.loclists_base + offset};
658
0
    }
659
660
0
    case DW_FORM_data16:
661
0
        return {spec, die, readBytes(info, 16)};
662
663
0
    case DW_FORM_ref_sup4:
664
0
    case DW_FORM_ref_sup8:
665
0
    case DW_FORM_strp_sup:
666
0
        SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files");
667
668
0
    default:
669
0
        SAFE_CHECK(false, "invalid attribute form");
670
97.3k
    }
671
0
    return {spec, die, 0ULL};
672
97.3k
}
673
674
// static
675
212k
Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view& sp) {
676
212k
    Dwarf::AttributeSpec spec;
677
212k
    spec.name = readULEB(sp);
678
212k
    spec.form = readULEB(sp);
679
212k
    if (spec.form == DW_FORM_implicit_const) {
680
0
        spec.implicitConst = readSLEB(sp);
681
0
    }
682
212k
    return spec;
683
212k
}
684
685
4.47k
Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const {
686
    // SAFE_CHECK(offset < info_.size(), "unexpected offset");
687
4.47k
    CompilationUnit cu;
688
4.47k
    std::string_view chunk(info_);
689
4.47k
    cu.offset = offset;
690
4.47k
    chunk.remove_prefix(offset);
691
692
    // 1) unit_length
693
4.47k
    auto initial_length = read<uint32_t>(chunk);
694
4.47k
    cu.is64Bit = (initial_length == uint32_t(-1));
695
4.47k
    cu.size = cu.is64Bit ? read<uint64_t>(chunk) : initial_length;
696
4.47k
    SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size");
697
4.47k
    cu.size += cu.is64Bit ? 12 : 4;
698
699
    // 2) version
700
4.47k
    cu.version = read<uint16_t>(chunk);
701
4.47k
    SAFE_CHECK(cu.version >= 2 && cu.version <= 5, "invalid info version");
702
703
4.47k
    if (cu.version == 5) {
704
        // DWARF5: 7.5.1.1 Full and Partial Compilation Unit Headers
705
        // 3) unit_type (new DWARF 5)
706
4.47k
        cu.unit_type = read<uint8_t>(chunk);
707
4.47k
        if (cu.unit_type != DW_UT_compile && cu.unit_type != DW_UT_skeleton) {
708
0
            return cu;
709
0
        }
710
        // 4) address_size
711
4.47k
        cu.addr_size = read<uint8_t>(chunk);
712
4.47k
        SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size");
713
714
        // 5) debug_abbrev_offset
715
4.47k
        cu.abbrev_offset = readOffset(chunk, cu.is64Bit);
716
717
4.47k
        if (cu.unit_type == DW_UT_skeleton) {
718
            // 6) dwo_id
719
0
            read<uint64_t>(chunk);
720
0
        }
721
4.47k
    } else {
722
        // DWARF4 has a single type of unit in .debug_info
723
0
        cu.unit_type = DW_UT_compile;
724
        // 3) debug_abbrev_offset
725
0
        cu.abbrev_offset = readOffset(chunk, cu.is64Bit);
726
        // 4) address_size
727
0
        cu.addr_size = read<uint8_t>(chunk);
728
0
        SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size");
729
0
    }
730
4.47k
    cu.first_die = chunk.data() - info_.data();
731
4.47k
    if (cu.version < 5) {
732
0
        return cu;
733
0
    }
734
735
4.47k
    Die die = getDieAtOffset(cu, cu.first_die);
736
4.47k
    if (die.abbr.tag != DW_TAG_compile_unit) {
737
0
        return cu;
738
0
    }
739
740
    // Read the DW_AT_*_base attributes.
741
    // Attributes which use FORMs relative to these base attrs
742
    // will not have valid values during this first pass!
743
48.6k
    forEachAttribute(cu, die, [&](const Attribute& attr) {
744
48.6k
        switch (attr.spec.name) {
745
4.47k
        case DW_AT_addr_base:
746
4.47k
        case DW_AT_GNU_addr_base:
747
4.47k
            cu.addr_base = std::get<uint64_t>(attr.attr_value);
748
4.47k
            break;
749
3.92k
        case DW_AT_loclists_base:
750
3.92k
            cu.loclists_base = std::get<uint64_t>(attr.attr_value);
751
3.92k
            break;
752
4.47k
        case DW_AT_rnglists_base:
753
4.47k
        case DW_AT_GNU_ranges_base:
754
4.47k
            cu.rnglists_base = std::get<uint64_t>(attr.attr_value);
755
4.47k
            break;
756
4.47k
        case DW_AT_str_offsets_base:
757
4.47k
            cu.str_offsets_base = std::get<uint64_t>(attr.attr_value);
758
4.47k
            break;
759
48.6k
        }
760
48.6k
        return true; // continue forEachAttribute
761
48.6k
    });
762
4.47k
    return cu;
763
4.47k
}
764
765
// Finds the Compilation Unit starting at offset.
766
0
Dwarf::CompilationUnit Dwarf::findCompilationUnit(uint64_t targetOffset) const {
767
    // SAFE_CHECK(targetOffset < info_.size(), "unexpected target address");
768
0
    uint64_t offset = 0;
769
0
    while (offset < info_.size()) {
770
0
        std::string_view chunk(info_);
771
0
        chunk.remove_prefix(offset);
772
773
0
        auto initial_length = read<uint32_t>(chunk);
774
0
        auto is64_bit = (initial_length == static_cast<uint32_t>(-1));
775
0
        auto size = is64_bit ? read<uint64_t>(chunk) : initial_length;
776
0
        SAFE_CHECK(size <= chunk.size(), "invalid chunk size");
777
0
        size += is64_bit ? 12 : 4;
778
779
0
        if (offset + size > targetOffset) {
780
0
            break;
781
0
        }
782
0
        offset += size;
783
0
    }
784
0
    return getCompilationUnit(offset);
785
0
}
786
787
8.94k
Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const {
788
    // Linear search in the .debug_abbrev section, starting at offset
789
8.94k
    std::string_view section = abbrev_;
790
8.94k
    section.remove_prefix(offset);
791
792
8.94k
    Dwarf::DIEAbbreviation abbr;
793
8.94k
    while (readAbbreviation(section, abbr)) {
794
8.94k
        if (abbr.code == code) {
795
8.94k
            return abbr;
796
8.94k
        }
797
8.94k
    }
798
799
0
    SAFE_CHECK(false, "could not find abbreviation code");
800
0
}
801
802
Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view& sp, uint64_t form,
803
0
                                                bool is64_bit) const {
804
0
    switch (form) {
805
0
    case DW_FORM_addr:
806
0
        return uint64_t(read<uintptr_t>(sp));
807
0
    case DW_FORM_block1:
808
0
        return readBytes(sp, read<uint8_t>(sp));
809
0
    case DW_FORM_block2:
810
0
        return readBytes(sp, read<uint16_t>(sp));
811
0
    case DW_FORM_block4:
812
0
        return readBytes(sp, read<uint32_t>(sp));
813
0
    case DW_FORM_block:
814
0
        [[fallthrough]];
815
0
    case DW_FORM_exprloc:
816
0
        return readBytes(sp, readULEB(sp));
817
0
    case DW_FORM_data1:
818
0
        [[fallthrough]];
819
0
    case DW_FORM_ref1:
820
0
        return uint64_t(read<uint8_t>(sp));
821
0
    case DW_FORM_data2:
822
0
        [[fallthrough]];
823
0
    case DW_FORM_ref2:
824
0
        return uint64_t(read<uint16_t>(sp));
825
0
    case DW_FORM_data4:
826
0
        [[fallthrough]];
827
0
    case DW_FORM_ref4:
828
0
        return uint64_t(read<uint32_t>(sp));
829
0
    case DW_FORM_data8:
830
0
        [[fallthrough]];
831
0
    case DW_FORM_ref8:
832
0
        return read<uint64_t>(sp);
833
0
    case DW_FORM_sdata:
834
0
        return uint64_t(readSLEB(sp));
835
0
    case DW_FORM_udata:
836
0
        [[fallthrough]];
837
0
    case DW_FORM_ref_udata:
838
0
        return readULEB(sp);
839
0
    case DW_FORM_flag:
840
0
        return uint64_t(read<uint8_t>(sp));
841
0
    case DW_FORM_flag_present:
842
0
        return uint64_t(1);
843
0
    case DW_FORM_sec_offset:
844
0
        [[fallthrough]];
845
0
    case DW_FORM_ref_addr:
846
0
        return readOffset(sp, is64_bit);
847
0
    case DW_FORM_string:
848
0
        return readNullTerminated(sp);
849
0
    case DW_FORM_strp:
850
0
        return getStringFromStringSection(str_, readOffset(sp, is64_bit));
851
0
    case DW_FORM_indirect: // form is explicitly specified
852
0
        return readAttributeValue(sp, readULEB(sp), is64_bit);
853
0
    default:
854
0
        SAFE_CHECK(false, "invalid attribute form");
855
0
        return uint64_t(1);
856
0
        ;
857
0
    }
858
0
}
859
860
/**
861
 * Find @address in .debug_aranges and return the offset in
862
 * .debug_info for compilation unit to which this address belongs.
863
 */
864
9.78k
bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t& offset) {
865
9.78k
    Section aranges_section(aranges);
866
9.78k
    std::string_view chunk;
867
10.8M
    while (aranges_section.next(chunk)) {
868
10.8M
        auto version = read<uint16_t>(chunk);
869
10.8M
        SAFE_CHECK(version == 2, "invalid aranges version");
870
871
10.8M
        offset = readOffset(chunk, aranges_section.is64Bit());
872
10.8M
        auto address_size = read<uint8_t>(chunk);
873
10.8M
        SAFE_CHECK(address_size == sizeof(uintptr_t), "invalid address size");
874
10.8M
        auto segment_size = read<uint8_t>(chunk);
875
10.8M
        SAFE_CHECK(segment_size == 0, "segmented architecture not supported");
876
877
        // Padded to a multiple of 2 addresses.
878
        // Strangely enough, this is the only place in the DWARF spec that requires
879
        // padding.
880
10.8M
        skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
881
24.8G
        for (;;) {
882
24.8G
            auto start = read<uintptr_t>(chunk);
883
24.8G
            auto length = read<uintptr_t>(chunk);
884
885
24.8G
            if (start == 0 && length == 0) {
886
10.8M
                break;
887
10.8M
            }
888
889
            // Is our address in this range?
890
24.7G
            if (address >= start && address < start + length) {
891
4.47k
                return true;
892
4.47k
            }
893
24.7G
        }
894
10.8M
    }
895
5.31k
    return false;
896
9.78k
}
897
898
8.94k
Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit& cu, uint64_t offset) const {
899
8.94k
    SAFE_CHECK(offset < info_.size(), "unexpected offset {}, info size {}", offset, info_.size());
900
8.94k
    Die die;
901
8.94k
    std::string_view sp {info_.data() + offset, cu.offset + cu.size - offset};
902
8.94k
    die.offset = offset;
903
8.94k
    die.is64Bit = cu.is64Bit;
904
8.94k
    auto code = readULEB(sp);
905
8.94k
    die.code = code;
906
8.94k
    if (code == 0) {
907
0
        return die;
908
0
    }
909
8.94k
    die.attr_offset = sp.data() - info_.data() - offset;
910
8.94k
    die.abbr = !cu.abbr_cache.empty() && die.code < kMaxAbbreviationEntries
911
8.94k
                       ? cu.abbr_cache[die.code - 1]
912
8.94k
                       : getAbbreviation(die.code, cu.abbrev_offset);
913
914
8.94k
    return die;
915
8.94k
}
916
917
/**
918
 * Find the @locationInfo for @address in the compilation unit represented
919
 * by the @sp .debug_info entry.
920
 * Returns whether the address was found.
921
 * Advances @sp to the next entry in .debug_info.
922
 */
923
bool Dwarf::findLocation(uintptr_t address, const LocationInfoMode mode, CompilationUnit& cu,
924
4.47k
                         LocationInfo& info, std::vector<SymbolizedFrame>& inline_frames) const {
925
4.47k
    Die die = getDieAtOffset(cu, cu.first_die);
926
    // Partial compilation unit (DW_TAG_partial_unit) is not supported.
927
4.47k
    SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
928
929
    // Offset in .debug_line for the line number VM program for this CU
930
4.47k
    std::optional<uint64_t> line_offset = 0;
931
4.47k
    std::string_view compilation_directory;
932
4.47k
    std::optional<std::string_view> main_file_name;
933
4.47k
    std::optional<uint64_t> base_addr_cu;
934
935
48.6k
    forEachAttribute(cu, die, [&](const Attribute& attr) {
936
48.6k
        switch (attr.spec.name) {
937
4.47k
        case DW_AT_stmt_list:
938
            // Offset in .debug_line for the line number VM program for this
939
            // compilation unit
940
4.47k
            line_offset = std::get<uint64_t>(attr.attr_value);
941
4.47k
            break;
942
4.47k
        case DW_AT_comp_dir:
943
            // Compilation directory
944
4.47k
            compilation_directory = std::get<std::string_view>(attr.attr_value);
945
4.47k
            break;
946
4.47k
        case DW_AT_name:
947
            // File name of main file being compiled
948
4.47k
            main_file_name = std::get<std::string_view>(attr.attr_value);
949
4.47k
            break;
950
4.47k
        case DW_AT_low_pc:
951
4.47k
        case DW_AT_entry_pc:
952
            // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was
953
            // introduced in DWARF3. Support either to determine the base address of
954
            // the CU.
955
4.47k
            base_addr_cu = std::get<uint64_t>(attr.attr_value);
956
4.47k
            break;
957
48.6k
        }
958
        // Iterate through all attributes until find all above.
959
48.6k
        return true;
960
48.6k
    });
961
962
4.47k
    if (main_file_name) {
963
4.47k
        info.has_main_file = true;
964
4.47k
        info.main_file = Path(compilation_directory, "", *main_file_name);
965
4.47k
    }
966
967
4.47k
    if (!line_offset) {
968
0
        return false;
969
0
    }
970
971
4.47k
    std::string_view line_section(line_);
972
4.47k
    line_section.remove_prefix(*line_offset);
973
4.47k
    LineNumberVM line_vm(line_section, compilation_directory, str_, line_str_);
974
975
    // Execute line number VM program to find file and line
976
4.47k
    info.has_file_and_line = line_vm.findAddress(address, info.file, info.line);
977
978
4.47k
    bool check_inline = (mode == LocationInfoMode::FULL_WITH_INLINE);
979
980
4.47k
    if (info.has_file_and_line && check_inline) {
981
        // Re-get the compilation unit with abbreviation cached.
982
0
        cu.abbr_cache.clear();
983
0
        cu.abbr_cache.resize(kMaxAbbreviationEntries);
984
0
        readCompilationUnitAbbrs(abbrev_, cu);
985
986
        // Find the subprogram that matches the given address.
987
0
        Die subprogram;
988
0
        findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram);
989
990
        // Subprogram is the DIE of caller function.
991
0
        if (/*check_inline &&*/ subprogram.abbr.has_children) {
992
            // Use an extra location and get its call file and call line, so that
993
            // they can be used for the second last location when we don't have
994
            // enough inline frames for all inline functions call stack.
995
0
            const size_t max_size = Dwarf::kMaxInlineLocationInfoPerFrame + 1;
996
0
            std::vector<CallLocation> call_locations;
997
0
            call_locations.reserve(Dwarf::kMaxInlineLocationInfoPerFrame + 1);
998
999
0
            findInlinedSubroutineDieForAddress(cu, subprogram, line_vm, address, base_addr_cu,
1000
0
                                               call_locations, max_size);
1001
0
            size_t num_found = call_locations.size();
1002
1003
0
            if (num_found > 0) {
1004
0
                const auto inner_most_file = info.file;
1005
0
                const auto inner_most_line = info.line;
1006
1007
                // Earlier we filled in locationInfo:
1008
                // - mainFile: the path to the CU -- the file where the non-inlined
1009
                //   call is made from.
1010
                // - file + line: the location of the inner-most inlined call.
1011
                // Here we already find inlined info so mainFile would be redundant.
1012
0
                info.has_main_file = false;
1013
0
                info.main_file = Path {};
1014
                // @findInlinedSubroutineDieForAddress fills inlineLocations[0] with the
1015
                // file+line of the non-inlined outer function making the call.
1016
                // locationInfo.name is already set by the caller by looking up the
1017
                // non-inlined function @address belongs to.
1018
0
                info.has_file_and_line = true;
1019
0
                info.file = call_locations[0].file;
1020
0
                info.line = call_locations[0].line;
1021
1022
                // The next inlined subroutine's call file and call line is the current
1023
                // caller's location.
1024
0
                for (size_t i = 0; i < num_found - 1; ++i) {
1025
0
                    call_locations[i].file = call_locations[i + 1].file;
1026
0
                    call_locations[i].line = call_locations[i + 1].line;
1027
0
                }
1028
                // CallLocation for the inner-most inlined function:
1029
                // - will be computed if enough space was available in the passed
1030
                //   buffer.
1031
                // - will have a .name, but no !.file && !.line
1032
                // - its corresponding file+line is the one returned by LineVM based
1033
                //   on @address.
1034
                // Use the inner-most inlined file+line info we got from the LineVM.
1035
0
                call_locations[num_found - 1].file = inner_most_file;
1036
0
                call_locations[num_found - 1].line = inner_most_line;
1037
1038
                // Fill in inline frames in reverse order (as expected by the caller).
1039
0
                std::reverse(call_locations.begin(), call_locations.end());
1040
0
                for (const auto& call_location : call_locations) {
1041
0
                    SymbolizedFrame inline_frame;
1042
0
                    inline_frame.found = true;
1043
0
                    inline_frame.addr = address;
1044
0
                    if (!call_location.name.empty()) {
1045
0
                        inline_frame.name = call_location.name.data();
1046
0
                    } else {
1047
0
                        inline_frame.name = nullptr;
1048
0
                    }
1049
0
                    inline_frame.location.has_file_and_line = true;
1050
0
                    inline_frame.location.file = call_location.file;
1051
0
                    inline_frame.location.line = call_location.line;
1052
0
                    inline_frames.push_back(inline_frame);
1053
0
                }
1054
0
            }
1055
0
        }
1056
0
    }
1057
1058
4.47k
    return info.has_file_and_line;
1059
4.47k
}
1060
1061
void Dwarf::findSubProgramDieForAddress(const CompilationUnit& cu, const Die& die, uint64_t address,
1062
                                        std::optional<uint64_t> base_addr_cu,
1063
0
                                        Die& subprogram) const {
1064
0
    forEachChild(cu, die, [&](const Die& child_die) {
1065
0
        if (child_die.abbr.tag == DW_TAG_subprogram) {
1066
0
            std::optional<uint64_t> low_pc;
1067
0
            std::optional<uint64_t> high_pc;
1068
0
            std::optional<bool> is_high_pc_addr;
1069
0
            std::optional<uint64_t> range_offset;
1070
0
            forEachAttribute(cu, child_die, [&](const Attribute& attr) {
1071
0
                switch (attr.spec.name) {
1072
0
                case DW_AT_ranges:
1073
0
                    range_offset = std::get<uint64_t>(attr.attr_value);
1074
0
                    break;
1075
0
                case DW_AT_low_pc:
1076
0
                    low_pc = std::get<uint64_t>(attr.attr_value);
1077
0
                    break;
1078
0
                case DW_AT_high_pc:
1079
                    // The value of the DW_AT_high_pc attribute can be
1080
                    // an address (DW_FORM_addr*) or an offset (DW_FORM_data*).
1081
0
                    is_high_pc_addr = attr.spec.form == DW_FORM_addr ||   //
1082
0
                                      attr.spec.form == DW_FORM_addrx ||  //
1083
0
                                      attr.spec.form == DW_FORM_addrx1 || //
1084
0
                                      attr.spec.form == DW_FORM_addrx2 || //
1085
0
                                      attr.spec.form == DW_FORM_addrx3 || //
1086
0
                                      attr.spec.form == DW_FORM_addrx4;
1087
0
                    high_pc = std::get<uint64_t>(attr.attr_value);
1088
0
                    break;
1089
0
                }
1090
                // Iterate through all attributes until find all above.
1091
0
                return true;
1092
0
            });
1093
0
            bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc &&
1094
0
                            (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
1095
0
            bool range_match =
1096
0
                    range_offset && isAddrInRangeList(cu, address, base_addr_cu,
1097
0
                                                      range_offset.value(), cu.addr_size);
1098
0
            if (pc_match || range_match) {
1099
0
                subprogram = child_die;
1100
0
                return false;
1101
0
            }
1102
0
        }
1103
1104
0
        findSubProgramDieForAddress(cu, child_die, address, base_addr_cu, subprogram);
1105
1106
        // Iterates through children until find the inline subprogram.
1107
0
        return true;
1108
0
    });
1109
0
}
1110
1111
/**
1112
 * Find DW_TAG_inlined_subroutine child DIEs that contain @address and
1113
 * then extract:
1114
 * - Where was it called from (DW_AT_call_file & DW_AT_call_line):
1115
 *   the statement or expression that caused the inline expansion.
1116
 * - The inlined function's name. As a function may be inlined multiple
1117
 *   times, common attributes like DW_AT_linkage_name or DW_AT_name
1118
 *   are only stored in its "concrete out-of-line instance" (a
1119
 *   DW_TAG_subprogram) which we find using DW_AT_abstract_origin.
1120
 */
1121
void Dwarf::findInlinedSubroutineDieForAddress(const CompilationUnit& cu, const Die& die,
1122
                                               const LineNumberVM& line_vm, uint64_t address,
1123
                                               std::optional<uint64_t> base_addr_cu,
1124
                                               std::vector<CallLocation>& locations,
1125
0
                                               const size_t max_size) const {
1126
0
    if (locations.size() >= max_size) {
1127
0
        return;
1128
0
    }
1129
1130
0
    forEachChild(cu, die, [&](const Die& child_die) {
1131
        // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might
1132
        // have arbitrary intermediary "nodes", including DW_TAG_common_block,
1133
        // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and
1134
        // DW_TAG_with_stmt, etc.
1135
        // We can't filter with locationhere since its range may be not specified.
1136
        // See section 2.6.2: A location list containing only an end of list entry
1137
        // describes an object that exists in the source code but not in the
1138
        // executable program.
1139
0
        if (child_die.abbr.tag == DW_TAG_try_block || child_die.abbr.tag == DW_TAG_catch_block ||
1140
0
            child_die.abbr.tag == DW_TAG_entry_point || child_die.abbr.tag == DW_TAG_common_block ||
1141
0
            child_die.abbr.tag == DW_TAG_lexical_block) {
1142
0
            findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu,
1143
0
                                               locations, max_size);
1144
0
            return true;
1145
0
        }
1146
1147
0
        std::optional<uint64_t> low_pc;
1148
0
        std::optional<uint64_t> high_pc;
1149
0
        std::optional<bool> is_high_pc_addr;
1150
0
        std::optional<uint64_t> abstract_origin;
1151
0
        std::optional<uint64_t> abstract_origin_ref_type;
1152
0
        std::optional<uint64_t> call_file;
1153
0
        std::optional<uint64_t> call_line;
1154
0
        std::optional<uint64_t> range_offset;
1155
0
        forEachAttribute(cu, child_die, [&](const Attribute& attr) {
1156
0
            switch (attr.spec.name) {
1157
0
            case DW_AT_ranges:
1158
0
                range_offset = std::get<uint64_t>(attr.attr_value);
1159
0
                break;
1160
0
            case DW_AT_low_pc:
1161
0
                low_pc = std::get<uint64_t>(attr.attr_value);
1162
0
                break;
1163
0
            case DW_AT_high_pc:
1164
                // The value of the DW_AT_high_pc attribute can be
1165
                // an address (DW_FORM_addr*) or an offset (DW_FORM_data*).
1166
0
                is_high_pc_addr = attr.spec.form == DW_FORM_addr ||   //
1167
0
                                  attr.spec.form == DW_FORM_addrx ||  //
1168
0
                                  attr.spec.form == DW_FORM_addrx1 || //
1169
0
                                  attr.spec.form == DW_FORM_addrx2 || //
1170
0
                                  attr.spec.form == DW_FORM_addrx3 || //
1171
0
                                  attr.spec.form == DW_FORM_addrx4;
1172
0
                high_pc = std::get<uint64_t>(attr.attr_value);
1173
0
                break;
1174
0
            case DW_AT_abstract_origin:
1175
0
                abstract_origin_ref_type = attr.spec.form;
1176
0
                abstract_origin = std::get<uint64_t>(attr.attr_value);
1177
0
                break;
1178
0
            case DW_AT_call_line:
1179
0
                call_line = std::get<uint64_t>(attr.attr_value);
1180
0
                break;
1181
0
            case DW_AT_call_file:
1182
0
                call_file = std::get<uint64_t>(attr.attr_value);
1183
0
                break;
1184
0
            }
1185
            // Iterate through all until find all above attributes.
1186
0
            return true;
1187
0
        });
1188
1189
        // 2.17 Code Addresses and Ranges
1190
        // Any debugging information entry describing an entity that has a
1191
        // machine code address or range of machine code addresses,
1192
        // which includes compilation units, module initialization, subroutines,
1193
        // ordinary blocks, try/catch blocks, labels and the like, may have
1194
        //  - A DW_AT_low_pc attribute for a single address,
1195
        //  - A DW_AT_low_pc and DW_AT_high_pc pair of attributes for a
1196
        //    single contiguous range of addresses, or
1197
        //  - A DW_AT_ranges attribute for a non-contiguous range of addresses.
1198
        // TODO: Support DW_TAG_entry_point and DW_TAG_common_block that don't
1199
        // have DW_AT_low_pc/DW_AT_high_pc pairs and DW_AT_ranges.
1200
        // TODO: Support relocated address which requires lookup in relocation map.
1201
0
        bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc &&
1202
0
                        (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc));
1203
0
        bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu,
1204
0
                                                             range_offset.value(), cu.addr_size);
1205
0
        if (!pc_match && !range_match) {
1206
            // Address doesn't match. Keep searching other children.
1207
0
            return true;
1208
0
        }
1209
1210
0
        if (!abstract_origin || !abstract_origin_ref_type || !call_line || !call_file) {
1211
            // We expect a single sibling DIE to match on addr, but it's missing
1212
            // required fields. Stop searching for other DIEs.
1213
0
            return false;
1214
0
        }
1215
1216
0
        CallLocation location;
1217
0
        location.file = line_vm.getFullFileName(*call_file);
1218
0
        location.line = *call_line;
1219
1220
        /// Something wrong with receiving debug info about inline.
1221
        /// If set to true we stop parsing DWARF.
1222
0
        bool die_for_inline_broken = false;
1223
1224
0
        auto get_function_name = [&](const CompilationUnit& srcu, uint64_t die_offset) {
1225
0
            Die decl_die = getDieAtOffset(srcu, die_offset);
1226
0
            auto& die_to_look_for_name = decl_die;
1227
1228
0
            Die def_die;
1229
            // Jump to the actual function definition instead of declaration for name
1230
            // and line info.
1231
            // DW_AT_specification: Incomplete, non-defining, or separate declaration
1232
            // corresponding to a declaration
1233
0
            auto offset = getAttribute<uint64_t>(srcu, decl_die, DW_AT_specification);
1234
0
            if (offset) {
1235
                /// FIXME: actually it's a bug in our DWARF parser.
1236
                ///
1237
                /// Most of the times compilation unit offset (srcu.offset) is some big number inside .debug_info (like 434782255).
1238
                /// Offset of DIE definition is some small relative number to srcu.offset (like 3518).
1239
                /// However in some unknown cases offset looks like global, non relative number (like 434672579) and in this
1240
                /// case we obviously doing something wrong parsing DWARF.
1241
                ///
1242
                /// What is important -- this bug? reproduces only with -flto=thin in release mode.
1243
                /// Also llvm-dwarfdump --verify ./clickhouse says that our DWARF is ok, so it's another prove
1244
                /// that we just doing something wrong.
1245
                ///
1246
                /// FIXME: Currently we just give up parsing DWARF for inlines when we got into this situation.
1247
0
                if (srcu.offset + offset.value() >= info_.size()) {
1248
0
                    die_for_inline_broken = true;
1249
0
                } else {
1250
0
                    def_die = getDieAtOffset(srcu, srcu.offset + offset.value());
1251
0
                    die_to_look_for_name = def_die;
1252
0
                }
1253
0
            }
1254
1255
0
            std::string_view name;
1256
1257
0
            if (die_for_inline_broken) {
1258
0
                return name;
1259
0
            }
1260
1261
            // The file and line will be set in the next inline subroutine based on
1262
            // its DW_AT_call_file and DW_AT_call_line.
1263
0
            forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute& attr) {
1264
0
                switch (attr.spec.name) {
1265
0
                case DW_AT_linkage_name:
1266
0
                    name = std::get<std::string_view>(attr.attr_value);
1267
0
                    break;
1268
0
                case DW_AT_name:
1269
                    // NOTE: when DW_AT_linkage_name and DW_AT_name match, dwarf
1270
                    // emitters omit DW_AT_linkage_name (to save space). If present
1271
                    // DW_AT_linkage_name should always be preferred (mangled C++ name
1272
                    // vs just the function name).
1273
0
                    if (name.empty()) {
1274
0
                        name = std::get<std::string_view>(attr.attr_value);
1275
0
                    }
1276
0
                    break;
1277
0
                }
1278
0
                return true;
1279
0
            });
1280
0
            return name;
1281
0
        };
1282
1283
        // DW_AT_abstract_origin is a reference. There a 3 types of references:
1284
        // - the reference can identify any debugging information entry within the
1285
        //   compilation unit (DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
1286
        //   DW_FORM_ref8, DW_FORM_ref_udata). This type of reference is an offset
1287
        //   from the first byte of the compilation header for the compilation unit
1288
        //   containing the reference.
1289
        // - the reference can identify any debugging information entry within a
1290
        //   .debug_info section; in particular, it may refer to an entry in a
1291
        //   different compilation unit (DW_FORM_ref_addr)
1292
        // - the reference can identify any debugging information type entry that
1293
        //   has been placed in its own type unit.
1294
        //   Not applicable for DW_AT_abstract_origin.
1295
0
        location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr)
1296
0
                                ? get_function_name(cu, cu.offset + *abstract_origin)
1297
0
                                : get_function_name(findCompilationUnit(*abstract_origin),
1298
0
                                                    *abstract_origin);
1299
1300
        /// FIXME: see comment above
1301
0
        if (die_for_inline_broken) {
1302
0
            return false;
1303
0
        }
1304
1305
0
        locations.push_back(location);
1306
1307
0
        findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations,
1308
0
                                           max_size);
1309
1310
0
        return false;
1311
0
    });
1312
0
}
1313
1314
bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo, LocationInfoMode mode,
1315
10.3k
                        std::vector<SymbolizedFrame>& inline_frames) const {
1316
10.3k
    locationInfo = LocationInfo();
1317
1318
10.3k
    if (mode == LocationInfoMode::DISABLED) {
1319
0
        return false;
1320
0
    }
1321
1322
10.3k
    if (!elf_) { // No file.
1323
533
        return false;
1324
533
    }
1325
1326
9.78k
    if (!aranges_.empty()) {
1327
        // Fast path: find the right .debug_info entry by looking up the
1328
        // address in .debug_aranges.
1329
9.78k
        uint64_t offset = 0;
1330
9.78k
        if (findDebugInfoOffset(address, aranges_, offset)) {
1331
            // Read compilation unit header from .debug_info
1332
4.47k
            auto unit = getCompilationUnit(offset);
1333
4.47k
            if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) {
1334
0
                return false;
1335
0
            }
1336
4.47k
            findLocation(address, mode, unit, locationInfo, inline_frames);
1337
4.47k
            return locationInfo.has_file_and_line;
1338
5.31k
        } else if (mode == LocationInfoMode::FAST) {
1339
            // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries
1340
            // in .debug_aranges for some functions, but always generates
1341
            // .debug_info entries.  Scanning .debug_info is slow, so fall back to
1342
            // it only if such behavior is requested via LocationInfoMode.
1343
5.31k
            return false;
1344
5.31k
        } else {
1345
0
            SAFE_CHECK(mode == LocationInfoMode::FULL || mode == LocationInfoMode::FULL_WITH_INLINE,
1346
0
                       "unexpected mode");
1347
            // Fall back to the linear scan.
1348
0
        }
1349
9.78k
    }
1350
1351
    // Slow path (linear scan): Iterate over all .debug_info entries
1352
    // and look for the address in each compilation unit.
1353
0
    uint64_t offset = 0;
1354
0
    while (offset < info_.size() && !locationInfo.has_file_and_line) {
1355
0
        auto unit = getCompilationUnit(offset);
1356
0
        offset += unit.size;
1357
0
        if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) {
1358
0
            continue;
1359
0
        }
1360
0
        findLocation(address, mode, unit, locationInfo, inline_frames);
1361
0
    }
1362
1363
0
    return locationInfo.has_file_and_line;
1364
9.78k
}
1365
1366
bool Dwarf::isAddrInRangeList(const CompilationUnit& cu, uint64_t address,
1367
                              std::optional<uint64_t> base_addr, size_t offset,
1368
0
                              uint8_t addr_size) const {
1369
0
    SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size");
1370
0
    if (cu.version <= 4 && !ranges_.empty()) {
1371
0
        const bool is64_bit_addr = addr_size == 8;
1372
0
        std::string_view sp = ranges_;
1373
0
        sp.remove_prefix(offset);
1374
0
        const uint64_t max_addr = is64_bit_addr ? std::numeric_limits<uint64_t>::max()
1375
0
                                                : std::numeric_limits<uint32_t>::max();
1376
0
        while (!sp.empty()) {
1377
0
            uint64_t begin = readOffset(sp, is64_bit_addr);
1378
0
            uint64_t end = readOffset(sp, is64_bit_addr);
1379
            // The range list entry is a base address selection entry.
1380
0
            if (begin == max_addr) {
1381
0
                base_addr = end;
1382
0
                continue;
1383
0
            }
1384
            // The range list entry is an end of list entry.
1385
0
            if (begin == 0 && end == 0) {
1386
0
                break;
1387
0
            }
1388
1389
            // Check if the given address falls in the range list entry.
1390
            // 2.17.3 Non-Contiguous Address Ranges
1391
            // The applicable base address of a range list entry is determined by the
1392
            // closest preceding base address selection entry (see below) in the same
1393
            // range list. If there is no such selection entry, then the applicable
1394
            // base address defaults to the base address of the compilation unit.
1395
0
            if (base_addr && address >= begin + *base_addr && address < end + *base_addr) {
1396
0
                return true;
1397
0
            }
1398
0
        }
1399
0
    }
1400
1401
0
    if (cu.version == 5 && !rnglists_.empty() && cu.addr_base.has_value()) {
1402
0
        auto rnglists = rnglists_;
1403
0
        rnglists.remove_prefix(offset);
1404
1405
0
        while (!rnglists.empty()) {
1406
0
            auto kind = read<uint8_t>(rnglists);
1407
0
            switch (kind) {
1408
0
            case DW_RLE_end_of_list:
1409
0
                return false;
1410
0
            case DW_RLE_base_addressx: {
1411
0
                auto index = readULEB(rnglists);
1412
0
                auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t));
1413
0
                base_addr = read<uint64_t>(sp);
1414
0
            } break;
1415
1416
0
            case DW_RLE_startx_endx: {
1417
0
                auto index_start = readULEB(rnglists);
1418
0
                auto index_end = readULEB(rnglists);
1419
0
                auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t));
1420
0
                auto start = read<uint64_t>(sp_start);
1421
1422
0
                auto sp_end = addr_.substr(*cu.addr_base + index_end * sizeof(uint64_t));
1423
0
                auto end = read<uint64_t>(sp_end);
1424
0
                if (address >= start && address < end) {
1425
0
                    return true;
1426
0
                }
1427
0
            } break;
1428
1429
0
            case DW_RLE_startx_length: {
1430
0
                auto index_start = readULEB(rnglists);
1431
0
                auto length = readULEB(rnglists);
1432
0
                auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t));
1433
0
                auto start = read<uint64_t>(sp_start);
1434
1435
0
                auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length);
1436
0
                auto end = read<uint64_t>(sp_end);
1437
0
                if (start != end && address >= start && address < end) {
1438
0
                    return true;
1439
0
                }
1440
0
            } break;
1441
1442
0
            case DW_RLE_offset_pair: {
1443
0
                auto offset_start = readULEB(rnglists);
1444
0
                auto offset_end = readULEB(rnglists);
1445
0
                if (base_addr && address >= (*base_addr + offset_start) &&
1446
0
                    address < (*base_addr + offset_end)) {
1447
0
                    return true;
1448
0
                }
1449
0
            } break;
1450
1451
0
            case DW_RLE_base_address:
1452
0
                base_addr = read<uint64_t>(rnglists);
1453
0
                break;
1454
1455
0
            case DW_RLE_start_end: {
1456
0
                uint64_t start = read<uint64_t>(rnglists);
1457
0
                uint64_t end = read<uint64_t>(rnglists);
1458
0
                if (address >= start && address < end) {
1459
0
                    return true;
1460
0
                }
1461
0
            } break;
1462
1463
0
            case DW_RLE_start_length: {
1464
0
                uint64_t start = read<uint64_t>(rnglists);
1465
0
                uint64_t end = start + readULEB(rnglists);
1466
0
                if (address >= start && address < end) {
1467
0
                    return true;
1468
0
                }
1469
0
            } break;
1470
1471
0
            default:
1472
0
                SAFE_CHECK(false, "Unexpected debug_rnglists entry kind");
1473
0
            }
1474
0
        }
1475
0
    }
1476
0
    return false;
1477
0
}
1478
1479
Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory,
1480
                                  std::string_view debugStr, std::string_view debugLineStr)
1481
        : compilationDirectory_(compilationDirectory),
1482
          debugStr_(debugStr),
1483
4.47k
          debugLineStr_(debugLineStr) {
1484
4.47k
    Section section(data);
1485
4.47k
    SAFE_CHECK(section.next(data_), "invalid line number VM");
1486
4.47k
    is64Bit_ = section.is64Bit();
1487
4.47k
    init();
1488
4.47k
    reset();
1489
4.47k
}
1490
1491
2.50M
void Dwarf::LineNumberVM::reset() {
1492
2.50M
    address_ = 0;
1493
2.50M
    file_ = 1;
1494
2.50M
    line_ = 1;
1495
2.50M
    column_ = 0;
1496
2.50M
    isStmt_ = defaultIsStmt_;
1497
2.50M
    basicBlock_ = false;
1498
2.50M
    endSequence_ = false;
1499
2.50M
    prologueEnd_ = false;
1500
2.50M
    epilogueBegin_ = false;
1501
2.50M
    isa_ = 0;
1502
2.50M
    discriminator_ = 0;
1503
2.50M
}
1504
1505
struct LineNumberAttribute {
1506
    uint64_t content_type_code;
1507
    uint64_t form_code;
1508
    std::variant<uint64_t, std::string_view> attr_value;
1509
};
1510
1511
LineNumberAttribute readLineNumberAttribute(bool is64_bit, std::string_view& format,
1512
                                            std::string_view& entries, std::string_view debugStr,
1513
3.74M
                                            std::string_view debugLineStr) {
1514
3.74M
    uint64_t content_type_code = readULEB(format);
1515
3.74M
    uint64_t form_code = readULEB(format);
1516
3.74M
    std::variant<uint64_t, std::string_view> attr_value;
1517
1518
3.74M
    switch (content_type_code) {
1519
1.97M
    case DW_LNCT_path: {
1520
1.97M
        switch (form_code) {
1521
0
        case DW_FORM_string:
1522
0
            attr_value = readNullTerminated(entries);
1523
0
            break;
1524
1.97M
        case DW_FORM_line_strp: {
1525
1.97M
            auto off = readOffset(entries, is64_bit);
1526
1.97M
            attr_value = getStringFromStringSection(debugLineStr, off);
1527
1.97M
        } break;
1528
0
        case DW_FORM_strp:
1529
0
            attr_value = getStringFromStringSection(debugStr, readOffset(entries, is64_bit));
1530
0
            break;
1531
0
        case DW_FORM_strp_sup:
1532
0
            SAFE_CHECK(false, "Unexpected DW_FORM_strp_sup");
1533
0
            break;
1534
0
        default:
1535
0
            SAFE_CHECK(false, "Unexpected form for DW_LNCT_path");
1536
0
            break;
1537
1.97M
        }
1538
1.97M
    } break;
1539
1540
1.97M
    case DW_LNCT_directory_index: {
1541
1.77M
        switch (form_code) {
1542
0
        case DW_FORM_data1:
1543
0
            attr_value = read<uint8_t>(entries);
1544
0
            break;
1545
0
        case DW_FORM_data2:
1546
0
            attr_value = read<uint16_t>(entries);
1547
0
            break;
1548
1.77M
        case DW_FORM_udata:
1549
1.77M
            attr_value = readULEB(entries);
1550
1.77M
            break;
1551
0
        default:
1552
0
            SAFE_CHECK(false, "Unexpected form for DW_LNCT_directory_index");
1553
0
            break;
1554
1.77M
        }
1555
1.77M
    } break;
1556
1557
1.77M
    case DW_LNCT_timestamp: {
1558
0
        switch (form_code) {
1559
0
        case DW_FORM_udata:
1560
0
            attr_value = readULEB(entries);
1561
0
            break;
1562
0
        case DW_FORM_data4:
1563
0
            attr_value = read<uint32_t>(entries);
1564
0
            break;
1565
0
        case DW_FORM_data8:
1566
0
            attr_value = read<uint64_t>(entries);
1567
0
            break;
1568
0
        case DW_FORM_block:
1569
0
            attr_value = readBytes(entries, readULEB(entries));
1570
0
            break;
1571
0
        default:
1572
0
            SAFE_CHECK(false, "Unexpected form for DW_LNCT_timestamp");
1573
0
        }
1574
0
    } break;
1575
1576
0
    case DW_LNCT_size: {
1577
0
        switch (form_code) {
1578
0
        case DW_FORM_udata:
1579
0
            attr_value = readULEB(entries);
1580
0
            break;
1581
0
        case DW_FORM_data1:
1582
0
            attr_value = read<uint8_t>(entries);
1583
0
            break;
1584
0
        case DW_FORM_data2:
1585
0
            attr_value = read<uint16_t>(entries);
1586
0
            break;
1587
0
        case DW_FORM_data4:
1588
0
            attr_value = read<uint32_t>(entries);
1589
0
            break;
1590
0
        case DW_FORM_data8:
1591
0
            attr_value = read<uint64_t>(entries);
1592
0
            break;
1593
0
        default:
1594
0
            SAFE_CHECK(false, "Unexpected form for DW_LNCT_size");
1595
0
            break;
1596
0
        }
1597
0
    } break;
1598
1599
0
    case DW_LNCT_MD5: {
1600
0
        switch (form_code) {
1601
0
        case DW_FORM_data16:
1602
0
            attr_value = readBytes(entries, 16);
1603
0
            break;
1604
0
        default:
1605
0
            SAFE_CHECK(false, "Unexpected form for DW_LNCT_MD5");
1606
0
            break;
1607
0
        }
1608
0
    } break;
1609
1610
0
    default:
1611
        // TODO: skip over vendor data as specified by the form instead.
1612
0
        SAFE_CHECK(false, "Unexpected vendor content type code");
1613
0
        break;
1614
3.74M
    }
1615
3.74M
    return {
1616
3.74M
            .content_type_code = content_type_code,
1617
3.74M
            .form_code = form_code,
1618
3.74M
            .attr_value = attr_value,
1619
3.74M
    };
1620
3.74M
}
1621
1622
4.47k
void Dwarf::LineNumberVM::init() {
1623
4.47k
    version_ = read<uint16_t>(data_);
1624
4.47k
    SAFE_CHECK(version_ >= 2 && version_ <= 5, "invalid version in line number VM: {}", version_);
1625
4.47k
    if (version_ == 5) {
1626
4.47k
        auto address_size = read<uint8_t>(data_);
1627
4.47k
        SAFE_CHECK(address_size == sizeof(uintptr_t), "Unexpected Line Number Table address_size");
1628
4.47k
        auto segment_selector_size = read<uint8_t>(data_);
1629
4.47k
        SAFE_CHECK(segment_selector_size == 0, "Segments not supported");
1630
4.47k
    }
1631
4.47k
    uint64_t header_length = readOffset(data_, is64Bit_);
1632
4.47k
    SAFE_CHECK(header_length <= data_.size(), "invalid line number VM header length");
1633
4.47k
    std::string_view header(data_.data(), header_length);
1634
4.47k
    data_ = std::string_view(header.end(), data_.end() - header.end());
1635
1636
4.47k
    minLength_ = read<uint8_t>(header);
1637
4.47k
    if (version_ >= 4) { // Version 2 and 3 records don't have this
1638
4.47k
        uint8_t max_ops_per_instruction = read<uint8_t>(header);
1639
4.47k
        SAFE_CHECK(max_ops_per_instruction == 1, "VLIW not supported");
1640
4.47k
    }
1641
4.47k
    defaultIsStmt_ = read<uint8_t>(header);
1642
4.47k
    lineBase_ = read<int8_t>(header); // yes, signed
1643
4.47k
    lineRange_ = read<uint8_t>(header);
1644
4.47k
    opcodeBase_ = read<uint8_t>(header);
1645
4.47k
    SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
1646
4.47k
    standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
1647
4.47k
    header.remove_prefix(opcodeBase_ - 1);
1648
1649
4.47k
    if (version_ <= 4) {
1650
        // We don't want to use heap, so we don't keep an unbounded amount of state.
1651
        // We'll just skip over include directories and file names here, and
1652
        // we'll loop again when we actually need to retrieve one.
1653
0
        std::string_view sp;
1654
0
        const char* tmp = header.data();
1655
0
        v4_.includeDirectoryCount = 0;
1656
0
        while (!(sp = readNullTerminated(header)).empty()) {
1657
0
            ++v4_.includeDirectoryCount;
1658
0
        }
1659
0
        v4_.includeDirectories = {tmp, header.data()};
1660
1661
0
        tmp = header.data();
1662
0
        FileName fn;
1663
0
        v4_.fileNameCount = 0;
1664
0
        while (readFileName(header, fn)) {
1665
0
            ++v4_.fileNameCount;
1666
0
        }
1667
0
        v4_.fileNames = {tmp, header.data()};
1668
4.47k
    } else if (version_ == 5) {
1669
4.47k
        v5_.directoryEntryFormatCount = read<uint8_t>(header);
1670
4.47k
        const char* tmp = header.data();
1671
8.94k
        for (uint8_t i = 0; i < v5_.directoryEntryFormatCount; i++) {
1672
            // A sequence of directory entry format descriptions. Each description
1673
            // consists of a pair of ULEB128 values:
1674
4.47k
            readULEB(header); // A content type code
1675
4.47k
            readULEB(header); // A form code using the attribute form codes
1676
4.47k
        }
1677
4.47k
        v5_.directoryEntryFormat = {tmp, header.data()};
1678
4.47k
        v5_.directoriesCount = readULEB(header);
1679
4.47k
        tmp = header.data();
1680
182k
        for (uint64_t i = 0; i < v5_.directoriesCount; i++) {
1681
177k
            std::string_view format = v5_.directoryEntryFormat;
1682
355k
            for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) {
1683
177k
                readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_);
1684
177k
            }
1685
177k
        }
1686
4.47k
        v5_.directories = {tmp, header.data()};
1687
1688
4.47k
        v5_.fileNameEntryFormatCount = read<uint8_t>(header);
1689
4.47k
        tmp = header.data();
1690
13.4k
        for (uint8_t i = 0; i < v5_.fileNameEntryFormatCount; i++) {
1691
            // A sequence of file entry format descriptions. Each description
1692
            // consists of a pair of ULEB128 values:
1693
8.94k
            readULEB(header); // A content type code
1694
8.94k
            readULEB(header); // A form code using the attribute form codes
1695
8.94k
        }
1696
4.47k
        v5_.fileNameEntryFormat = {tmp, header.data()};
1697
4.47k
        v5_.fileNamesCount = readULEB(header);
1698
4.47k
        tmp = header.data();
1699
890k
        for (uint64_t i = 0; i < v5_.fileNamesCount; i++) {
1700
886k
            std::string_view format = v5_.fileNameEntryFormat;
1701
2.65M
            for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) {
1702
1.77M
                readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_);
1703
1.77M
            }
1704
886k
        }
1705
4.47k
        v5_.fileNames = {tmp, header.data()};
1706
4.47k
    }
1707
4.47k
}
1708
1709
80.9M
bool Dwarf::LineNumberVM::next(std::string_view& program) {
1710
80.9M
    Dwarf::LineNumberVM::StepResult ret;
1711
218M
    do {
1712
218M
        ret = step(program);
1713
218M
    } while (ret == CONTINUE);
1714
1715
80.9M
    return (ret == COMMIT);
1716
80.9M
}
1717
1718
4.47k
Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const {
1719
4.47k
    if (version_ <= 4) {
1720
0
        SAFE_CHECK(index != 0, "invalid file index 0");
1721
0
        FileName fn;
1722
0
        if (index <= v4_.fileNameCount) {
1723
0
            std::string_view file_names = v4_.fileNames;
1724
0
            for (; index; --index) {
1725
0
                if (!readFileName(file_names, fn)) {
1726
0
                    abort();
1727
0
                }
1728
0
            }
1729
0
            return fn;
1730
0
        }
1731
1732
0
        index -= v4_.fileNameCount;
1733
1734
0
        std::string_view program = data_;
1735
0
        for (; index; --index) {
1736
0
            SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
1737
0
        }
1738
1739
0
        return fn;
1740
4.47k
    } else {
1741
4.47k
        FileName fn;
1742
4.47k
        SAFE_CHECK(index < v5_.fileNamesCount, "invalid file index");
1743
4.47k
        std::string_view file_names = v5_.fileNames;
1744
890k
        for (uint64_t i = 0; i < v5_.fileNamesCount; i++) {
1745
886k
            std::string_view format = v5_.fileNameEntryFormat;
1746
2.65M
            for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) {
1747
1.77M
                auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_,
1748
1.77M
                                                    debugLineStr_);
1749
1.77M
                if (i == index) {
1750
8.94k
                    switch (attr.content_type_code) {
1751
4.47k
                    case DW_LNCT_path:
1752
4.47k
                        fn.relativeName = std::get<std::string_view>(attr.attr_value);
1753
4.47k
                        break;
1754
4.47k
                    case DW_LNCT_directory_index:
1755
4.47k
                        fn.directoryIndex = std::get<uint64_t>(attr.attr_value);
1756
4.47k
                        break;
1757
8.94k
                    }
1758
8.94k
                }
1759
1.77M
            }
1760
886k
        }
1761
4.47k
        return fn;
1762
4.47k
    }
1763
4.47k
}
1764
1765
4.47k
std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const {
1766
4.47k
    if (version_ <= 4) {
1767
0
        if (index == 0) {
1768
            // In DWARF <= 4 the current directory is not represented in the
1769
            // directories field and a directory index of 0 implicitly referred to
1770
            // that directory as found in the DW_AT_comp_dir attribute of the
1771
            // compilation unit debugging information entry.
1772
0
            return {};
1773
0
        }
1774
1775
0
        SAFE_CHECK(index <= v4_.includeDirectoryCount, "invalid include directory");
1776
1777
0
        std::string_view include_directories = v4_.includeDirectories;
1778
0
        std::string_view dir;
1779
0
        for (; index; --index) {
1780
0
            dir = readNullTerminated(include_directories);
1781
0
            if (dir.empty()) {
1782
0
                abort(); // BUG
1783
0
            }
1784
0
        }
1785
1786
0
        return dir;
1787
4.47k
    } else {
1788
4.47k
        SAFE_CHECK(index < v5_.directoriesCount, "invalid file index");
1789
4.47k
        std::string_view directories = v5_.directories;
1790
22.6k
        for (uint64_t i = 0; i < v5_.directoriesCount; i++) {
1791
22.6k
            std::string_view format = v5_.directoryEntryFormat;
1792
40.8k
            for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) {
1793
22.6k
                auto attr = readLineNumberAttribute(is64Bit_, format, directories, debugStr_,
1794
22.6k
                                                    debugLineStr_);
1795
22.6k
                if (i == index && attr.content_type_code == DW_LNCT_path) {
1796
4.47k
                    return std::get<std::string_view>(attr.attr_value);
1797
4.47k
                }
1798
22.6k
            }
1799
22.6k
        }
1800
        // This could only happen if DWARF5's directory_entry_format doesn't contain
1801
        // a DW_LNCT_path. Highly unlikely, but we shouldn't crash.
1802
0
        return std::string_view("<directory not found>");
1803
4.47k
    }
1804
4.47k
}
1805
1806
0
bool Dwarf::LineNumberVM::readFileName(std::string_view& program, FileName& fn) {
1807
0
    fn.relativeName = readNullTerminated(program);
1808
0
    if (fn.relativeName.empty()) {
1809
0
        return false;
1810
0
    }
1811
0
    fn.directoryIndex = readULEB(program);
1812
    // Skip over file size and last modified time
1813
0
    readULEB(program);
1814
0
    readULEB(program);
1815
0
    return true;
1816
0
}
1817
1818
0
bool Dwarf::LineNumberVM::nextDefineFile(std::string_view& program, FileName& fn) const {
1819
0
    while (!program.empty()) {
1820
0
        auto opcode = read<uint8_t>(program);
1821
1822
0
        if (opcode >= opcodeBase_) { // special opcode
1823
0
            continue;
1824
0
        }
1825
1826
0
        if (opcode != 0) { // standard opcode
1827
            // Skip, slurp the appropriate number of LEB arguments
1828
0
            uint8_t arg_count = standardOpcodeLengths_[opcode - 1];
1829
0
            while (arg_count--) {
1830
0
                readULEB(program);
1831
0
            }
1832
0
            continue;
1833
0
        }
1834
1835
        // Extended opcode
1836
0
        auto length = readULEB(program);
1837
        // the opcode itself should be included in the length, so length >= 1
1838
0
        SAFE_CHECK(length != 0, "invalid extended opcode length");
1839
0
        read<uint8_t>(program); // extended opcode
1840
0
        --length;
1841
1842
0
        if (opcode == DW_LNE_define_file) {
1843
0
            SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5");
1844
0
            SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file");
1845
0
            return true;
1846
0
        }
1847
1848
0
        program.remove_prefix(length);
1849
0
    }
1850
1851
0
    return false;
1852
0
}
1853
1854
218M
Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view& program) {
1855
218M
    auto opcode = read<uint8_t>(program);
1856
1857
218M
    if (opcode >= opcodeBase_) { // special opcode
1858
69.9M
        uint8_t adjusted_opcode = opcode - opcodeBase_;
1859
69.9M
        uint8_t op_advance = adjusted_opcode / lineRange_;
1860
1861
69.9M
        address_ += minLength_ * op_advance;
1862
69.9M
        line_ += lineBase_ + adjusted_opcode % lineRange_;
1863
1864
69.9M
        basicBlock_ = false;
1865
69.9M
        prologueEnd_ = false;
1866
69.9M
        epilogueBegin_ = false;
1867
69.9M
        discriminator_ = 0;
1868
69.9M
        return COMMIT;
1869
69.9M
    }
1870
1871
148M
    if (opcode != 0) { // standard opcode
1872
        // Only interpret opcodes that are recognized by the version we're parsing;
1873
        // the others are vendor extensions and we should ignore them.
1874
143M
        switch (opcode) {
1875
8.50M
        case DW_LNS_copy:
1876
8.50M
            basicBlock_ = false;
1877
8.50M
            prologueEnd_ = false;
1878
8.50M
            epilogueBegin_ = false;
1879
8.50M
            discriminator_ = 0;
1880
8.50M
            return COMMIT;
1881
11.7M
        case DW_LNS_advance_pc:
1882
11.7M
            address_ += minLength_ * readULEB(program);
1883
11.7M
            return CONTINUE;
1884
52.9M
        case DW_LNS_advance_line:
1885
52.9M
            line_ += readSLEB(program);
1886
52.9M
            return CONTINUE;
1887
3.90M
        case DW_LNS_set_file:
1888
3.90M
            file_ = readULEB(program);
1889
3.90M
            return CONTINUE;
1890
28.4M
        case DW_LNS_set_column:
1891
28.4M
            column_ = readULEB(program);
1892
28.4M
            return CONTINUE;
1893
16.7M
        case DW_LNS_negate_stmt:
1894
16.7M
            isStmt_ = !isStmt_;
1895
16.7M
            return CONTINUE;
1896
0
        case DW_LNS_set_basic_block:
1897
0
            basicBlock_ = true;
1898
0
            return CONTINUE;
1899
15.6M
        case DW_LNS_const_add_pc:
1900
15.6M
            address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
1901
15.6M
            return CONTINUE;
1902
0
        case DW_LNS_fixed_advance_pc:
1903
0
            address_ += read<uint16_t>(program);
1904
0
            return CONTINUE;
1905
2.78M
        case DW_LNS_set_prologue_end:
1906
2.78M
            if (version_ == 2) {
1907
0
                break; // not supported in version 2
1908
0
            }
1909
2.78M
            prologueEnd_ = true;
1910
2.78M
            return CONTINUE;
1911
2.73M
        case DW_LNS_set_epilogue_begin:
1912
2.73M
            if (version_ == 2) {
1913
0
                break; // not supported in version 2
1914
0
            }
1915
2.73M
            epilogueBegin_ = true;
1916
2.73M
            return CONTINUE;
1917
0
        case DW_LNS_set_isa:
1918
0
            if (version_ == 2) {
1919
0
                break; // not supported in version 2
1920
0
            }
1921
0
            isa_ = readULEB(program);
1922
0
            return CONTINUE;
1923
143M
        }
1924
1925
        // Unrecognized standard opcode, slurp the appropriate number of LEB
1926
        // arguments.
1927
0
        uint8_t arg_count = standardOpcodeLengths_[opcode - 1];
1928
0
        while (arg_count--) {
1929
0
            readULEB(program);
1930
0
        }
1931
0
        return CONTINUE;
1932
143M
    }
1933
1934
    // Extended opcode
1935
4.99M
    auto length = readULEB(program);
1936
    // the opcode itself should be included in the length, so length >= 1
1937
4.99M
    SAFE_CHECK(length != 0, "invalid extended opcode length");
1938
4.99M
    auto extended_opcode = read<uint8_t>(program);
1939
4.99M
    --length;
1940
1941
4.99M
    switch (extended_opcode) {
1942
2.49M
    case DW_LNE_end_sequence:
1943
2.49M
        return END;
1944
2.50M
    case DW_LNE_set_address:
1945
2.50M
        address_ = read<uintptr_t>(program);
1946
2.50M
        return CONTINUE;
1947
0
    case DW_LNE_define_file:
1948
0
        SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5");
1949
        // We can't process DW_LNE_define_file here, as it would require us to
1950
        // use unbounded amounts of state (ie. use the heap).  We'll do a second
1951
        // pass (using nextDefineFile()) if necessary.
1952
0
        break;
1953
0
    case DW_LNE_set_discriminator:
1954
0
        discriminator_ = readULEB(program);
1955
0
        return CONTINUE;
1956
4.99M
    }
1957
1958
    // Unrecognized extended opcode
1959
0
    program.remove_prefix(length);
1960
0
    return CONTINUE;
1961
4.99M
}
1962
1963
4.47k
Dwarf::Path Dwarf::LineNumberVM::getFullFileName(uint64_t index) const {
1964
4.47k
    auto fn = getFileName(index);
1965
    // DWARF <= 4: the current dir is not represented in the CU's Line Number
1966
    // Program Header and relies on the CU's DW_AT_comp_dir.
1967
    // DWARF 5: the current directory is explicitly present.
1968
4.47k
    const std::string_view base_dir = version_ == 5 ? "" : compilationDirectory_;
1969
4.47k
    return Path(base_dir, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
1970
4.47k
}
1971
1972
4.47k
bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file, uint64_t& line) {
1973
4.47k
    std::string_view program = data_;
1974
1975
    // Within each sequence of instructions, the address may only increase.
1976
    // Unfortunately, within the same compilation unit, sequences may appear
1977
    // in any order.  So any sequence is a candidate if it starts at an address
1978
    // <= the target address, and we know we've found the target address if
1979
    // a candidate crosses the target address.
1980
4.47k
    enum State {
1981
4.47k
        START,
1982
4.47k
        LOW_SEQ, // candidate
1983
4.47k
        HIGH_SEQ
1984
4.47k
    };
1985
4.47k
    State state = START;
1986
4.47k
    reset();
1987
1988
4.47k
    uint64_t prev_file = 0;
1989
4.47k
    uint64_t prev_line = 0;
1990
80.9M
    while (!program.empty()) {
1991
80.9M
        bool seq_end = !next(program);
1992
1993
80.9M
        if (state == START) {
1994
2.50M
            if (!seq_end) {
1995
2.50M
                state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
1996
2.50M
            }
1997
2.50M
        }
1998
1999
80.9M
        if (state == LOW_SEQ) {
2000
77.3M
            if (address_ > target) {
2001
                // Found it!  Note that ">" is indeed correct (not ">="), as each
2002
                // sequence is guaranteed to have one entry past-the-end (emitted by
2003
                // DW_LNE_end_sequence)
2004
                //
2005
                // NOTE: In DWARF <= 4 the file register is non-zero.
2006
                //   See DWARF 4: 6.2.4 The Line Number Program Header
2007
                //   "The line number program assigns numbers to each of the file
2008
                //   entries in order, beginning with 1, and uses those numbers instead
2009
                //   of file names in the file register."
2010
                // DWARF 5 has a different include directory/file header and 0 is valid.
2011
4.47k
                if (version_ <= 4 && prev_file == 0) {
2012
0
                    return false;
2013
0
                }
2014
4.47k
                file = getFullFileName(prev_file);
2015
4.47k
                line = prev_line;
2016
4.47k
                return true;
2017
4.47k
            }
2018
77.3M
            prev_file = file_;
2019
77.3M
            prev_line = line_;
2020
77.3M
        }
2021
2022
80.9M
        if (seq_end) {
2023
2.49M
            state = START;
2024
2.49M
            reset();
2025
2.49M
        }
2026
80.9M
    }
2027
2028
0
    return false;
2029
4.47k
}
2030
2031
} // namespace doris
2032
2033
#endif