/root/doris/be/src/common/dwarf.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Dwarf.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #if defined(__ELF__) && !defined(__FreeBSD__) |
22 | | |
23 | | /* |
24 | | * Copyright 2012-present Facebook, Inc. |
25 | | * |
26 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
27 | | * you may not use this file except in compliance with the License. |
28 | | * You may obtain a copy of the License at |
29 | | * |
30 | | * http://www.apache.org/licenses/LICENSE-2.0 |
31 | | * |
32 | | * Unless required by applicable law or agreed to in writing, software |
33 | | * distributed under the License is distributed on an "AS IS" BASIS, |
34 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
35 | | * See the License for the specific language governing permissions and |
36 | | * limitations under the License. |
37 | | */ |
38 | | |
39 | | /** This file was edited for ClickHouse. |
40 | | */ |
41 | | |
42 | | #include "common/dwarf.h" |
43 | | |
44 | | #include <cstring> |
45 | | |
46 | | #include "common/elf.h" |
47 | | #include "common/logging.h" |
48 | | |
49 | 258 | #define DW_CHILDREN_no 0 |
50 | | |
51 | 258 | #define DW_FORM_addr 1 |
52 | 0 | #define DW_FORM_block1 0x0a |
53 | 0 | #define DW_FORM_block2 3 |
54 | 0 | #define DW_FORM_block4 4 |
55 | 0 | #define DW_FORM_block 9 |
56 | 0 | #define DW_FORM_exprloc 0x18 |
57 | 0 | #define DW_FORM_data1 0x0b |
58 | 0 | #define DW_FORM_ref1 0x11 |
59 | 258 | #define DW_FORM_data2 0x05 |
60 | 258 | #define DW_FORM_ref2 0x12 |
61 | 0 | #define DW_FORM_data4 0x06 |
62 | 0 | #define DW_FORM_ref4 0x13 |
63 | 0 | #define DW_FORM_data8 0x07 |
64 | 0 | #define DW_FORM_ref8 0x14 |
65 | 0 | #define DW_FORM_ref_sig8 0x20 |
66 | 0 | #define DW_FORM_sdata 0x0d |
67 | 42.6k | #define DW_FORM_udata 0x0f |
68 | 0 | #define DW_FORM_ref_udata 0x15 |
69 | 0 | #define DW_FORM_flag 0x0c |
70 | 0 | #define DW_FORM_flag_present 0x19 |
71 | 1.24k | #define DW_FORM_sec_offset 0x17 |
72 | 1.24k | #define DW_FORM_ref_addr 0x10 |
73 | 0 | #define DW_FORM_string 0x08 |
74 | 0 | #define DW_FORM_strp 0x0e |
75 | 0 | #define DW_FORM_indirect 0x16 |
76 | 0 | #define DW_FORM_strx 0x1a |
77 | 0 | #define DW_FORM_addrx 0x1b |
78 | 0 | #define DW_FORM_ref_sup4 0x1c |
79 | 0 | #define DW_FORM_strp_sup 0x1d |
80 | 0 | #define DW_FORM_data16 0x1e |
81 | 47.8k | #define DW_FORM_line_strp 0x1f |
82 | 6.09k | #define DW_FORM_implicit_const 0x21 |
83 | 258 | #define DW_FORM_rnglistx 0x23 |
84 | 0 | #define DW_FORM_loclistx 0x22 |
85 | 0 | #define DW_FORM_ref_sup8 0x24 |
86 | 774 | #define DW_FORM_strx1 0x25 |
87 | 0 | #define DW_FORM_strx2 0x26 |
88 | 0 | #define DW_FORM_strx3 0x27 |
89 | 0 | #define DW_FORM_strx4 0x28 |
90 | 0 | #define DW_FORM_addrx1 0x29 |
91 | 0 | #define DW_FORM_addrx2 0x2a |
92 | 0 | #define DW_FORM_addrx3 0x2b |
93 | 0 | #define DW_FORM_addrx4 0x2c |
94 | | |
95 | 129 | #define DW_TAG_compile_unit 0x11 |
96 | 0 | #define DW_TAG_subprogram 0x2e |
97 | 0 | #define DW_TAG_try_block 0x32 |
98 | 0 | #define DW_TAG_catch_block 0x25 |
99 | 0 | #define DW_TAG_entry_point 0x03 |
100 | 0 | #define DW_TAG_common_block 0x1a |
101 | 0 | #define DW_TAG_lexical_block 0x0b |
102 | | |
103 | 129 | #define DW_AT_stmt_list 0x10 |
104 | 129 | #define DW_AT_comp_dir 0x1b |
105 | 129 | #define DW_AT_name 0x03 |
106 | 0 | #define DW_AT_high_pc 0x12 |
107 | 129 | #define DW_AT_low_pc 0x11 |
108 | 129 | #define DW_AT_entry_pc 0x52 |
109 | 0 | #define DW_AT_ranges 0x55 |
110 | 0 | #define DW_AT_abstract_origin 0x31 |
111 | 0 | #define DW_AT_call_line 0x59 |
112 | 0 | #define DW_AT_call_file 0x58 |
113 | 0 | #define DW_AT_linkage_name 0x6e |
114 | 0 | #define DW_AT_specification 0x47 |
115 | 129 | #define DW_AT_str_offsets_base 0x72 |
116 | 129 | #define DW_AT_addr_base 0x73 |
117 | 129 | #define DW_AT_rnglists_base 0x74 |
118 | 104 | #define DW_AT_loclists_base 0x8c |
119 | 129 | #define DW_AT_GNU_ranges_base 0x2132 |
120 | 129 | #define DW_AT_GNU_addr_base 0x2133 |
121 | | |
122 | 0 | #define DW_LNE_define_file 0x03 |
123 | 68.2k | #define DW_LNS_copy 0x01 |
124 | 82.0k | #define DW_LNS_advance_pc 0x02 |
125 | 371k | #define DW_LNS_advance_line 0x03 |
126 | 43.8k | #define DW_LNS_set_file 0x04 |
127 | 139k | #define DW_LNS_set_column 0x05 |
128 | 90.6k | #define DW_LNS_negate_stmt 0x06 |
129 | 0 | #define DW_LNS_set_basic_block 0x07 |
130 | 87.4k | #define DW_LNS_const_add_pc 0x08 |
131 | 0 | #define DW_LNS_fixed_advance_pc 0x09 |
132 | 15.0k | #define DW_LNS_set_prologue_end 0x0a |
133 | 14.8k | #define DW_LNS_set_epilogue_begin 0x0b |
134 | 0 | #define DW_LNS_set_isa 0x0c |
135 | 8.87k | #define DW_LNE_end_sequence 0x01 |
136 | 8.98k | #define DW_LNE_set_address 0x02 |
137 | 0 | #define DW_LNE_set_discriminator 0x04 |
138 | | |
139 | 48.0k | #define DW_LNCT_path 0x1 |
140 | 42.8k | #define DW_LNCT_directory_index 0x2 |
141 | 0 | #define DW_LNCT_timestamp 0x3 |
142 | 0 | #define DW_LNCT_size 0x4 |
143 | 0 | #define DW_LNCT_MD5 0x5 |
144 | | |
145 | 0 | #define DW_RLE_end_of_list 0x0 |
146 | 0 | #define DW_RLE_base_addressx 0x1 |
147 | 0 | #define DW_RLE_startx_endx 0x2 |
148 | 0 | #define DW_RLE_startx_length 0x3 |
149 | 0 | #define DW_RLE_offset_pair 0x4 |
150 | 0 | #define DW_RLE_base_address 0x5 |
151 | 0 | #define DW_RLE_start_end 0x6 |
152 | 0 | #define DW_RLE_start_length 0x7 |
153 | | |
154 | | namespace doris { |
155 | | |
156 | | Dwarf::Dwarf(const std::shared_ptr<Elf>& elf) |
157 | | : elf_(elf), |
158 | | abbrev_(getSection(".debug_abbrev")), |
159 | | addr_(getSection(".debug_addr")), |
160 | | aranges_(getSection(".debug_aranges")), |
161 | | info_(getSection(".debug_info")), |
162 | | line_(getSection(".debug_line")), |
163 | | line_str_(getSection(".debug_line_str")), |
164 | | loclists_(getSection(".debug_loclists")), |
165 | | ranges_(getSection(".debug_ranges")), |
166 | | rnglists_(getSection(".debug_rnglists")), |
167 | | str_(getSection(".debug_str")), |
168 | 38 | str_offsets_(getSection(".debug_str_offsets")) { |
169 | | // Optional sections: |
170 | | // - debugAranges_: for fast address range lookup. |
171 | | // If missing .debug_info can be used - but it's much slower (linear |
172 | | // scan). |
173 | | // - debugRanges_ (DWARF 4) / debugRnglists_ (DWARF 5): non-contiguous |
174 | | // address ranges of debugging information entries. |
175 | | // Used for inline function address lookup. |
176 | 38 | if (info_.empty() || abbrev_.empty() || line_.empty() || str_.empty()) { |
177 | 19 | elf_ = nullptr; |
178 | 19 | } |
179 | 38 | } |
180 | | |
181 | 448 | Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) {} |
182 | | |
183 | | #define SAFE_CHECK(cond, ...) \ |
184 | 1.39G | do { \ |
185 | 1.39G | if (!(cond)) LOG(FATAL) << fmt::format(__VA_ARGS__); \ |
186 | 1.39G | } while (false) |
187 | | |
188 | | namespace { |
189 | | // Maximum number of DIEAbbreviation to cache in a compilation unit. Used to |
190 | | // speed up inline function lookup. |
191 | | const uint32_t kMaxAbbreviationEntries = 1000; |
192 | | |
193 | | // All following read* functions read from a std::string_view, advancing the |
194 | | // std::string_view, and aborting if there's not enough room. |
195 | | |
196 | | // Read (bitwise) one object of type T |
197 | | template <typename T> |
198 | | requires std::is_trivial_v<T> && std::is_standard_layout_v<T> |
199 | 1.39G | T read(std::string_view& sp) { |
200 | 1.39G | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), |
201 | 1.39G | sp.size()); |
202 | 1.39G | T x; |
203 | 1.39G | memcpy(&x, sp.data(), sizeof(T)); |
204 | 1.39G | sp.remove_prefix(sizeof(T)); |
205 | 1.39G | return x; |
206 | 1.39G | } dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIhEET_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 199 | 3.21M | T read(std::string_view& sp) { | 200 | 3.21M | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 201 | 3.21M | sp.size()); | 202 | 3.21M | T x; | 203 | 3.21M | memcpy(&x, sp.data(), sizeof(T)); | 204 | 3.21M | sp.remove_prefix(sizeof(T)); | 205 | 3.21M | return x; | 206 | 3.21M | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readImEET_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 199 | 1.38G | T read(std::string_view& sp) { | 200 | 1.38G | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 201 | 1.38G | sp.size()); | 202 | 1.38G | T x; | 203 | 1.38G | memcpy(&x, sp.data(), sizeof(T)); | 204 | 1.38G | sp.remove_prefix(sizeof(T)); | 205 | 1.38G | return x; | 206 | 1.38G | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIjEET_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 199 | 700k | T read(std::string_view& sp) { | 200 | 700k | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 201 | 700k | sp.size()); | 202 | 700k | T x; | 203 | 700k | memcpy(&x, sp.data(), sizeof(T)); | 204 | 700k | sp.remove_prefix(sizeof(T)); | 205 | 700k | return x; | 206 | 700k | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readItEET_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 199 | 325k | T read(std::string_view& sp) { | 200 | 325k | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 201 | 325k | sp.size()); | 202 | 325k | T x; | 203 | 325k | memcpy(&x, sp.data(), sizeof(T)); | 204 | 325k | sp.remove_prefix(sizeof(T)); | 205 | 325k | return x; | 206 | 325k | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIaEET_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 199 | 129 | T read(std::string_view& sp) { | 200 | 129 | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 201 | 129 | sp.size()); | 202 | 129 | T x; | 203 | 129 | memcpy(&x, sp.data(), sizeof(T)); | 204 | 129 | sp.remove_prefix(sizeof(T)); | 205 | 129 | return x; | 206 | 129 | } |
|
207 | | |
208 | | // Read (bitwise) an unsigned number of N bytes (N in 1, 2, 3, 4). |
209 | | template <size_t N> |
210 | 774 | uint64_t readU64(std::string_view& sp) { |
211 | 774 | SAFE_CHECK(sp.size() >= N, "underflow"); |
212 | 774 | uint64_t x = 0; |
213 | 774 | memcpy(&x, sp.data(), N); |
214 | 774 | sp.remove_prefix(N); |
215 | 774 | return x; |
216 | 774 | } dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm1EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 210 | 774 | uint64_t readU64(std::string_view& sp) { | 211 | 774 | SAFE_CHECK(sp.size() >= N, "underflow"); | 212 | 774 | uint64_t x = 0; | 213 | 774 | memcpy(&x, sp.data(), N); | 214 | 774 | sp.remove_prefix(N); | 215 | 774 | return x; | 216 | 774 | } |
Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm2EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm3EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm4EEEmRSt17basic_string_viewIcSt11char_traitsIcEE |
217 | | |
218 | | // Read ULEB (unsigned) varint value; algorithm from the DWARF spec |
219 | 892k | uint64_t readULEB(std::string_view& sp, uint8_t& shift, uint8_t& val) { |
220 | 892k | uint64_t r = 0; |
221 | 892k | shift = 0; |
222 | 1.20M | do { |
223 | 1.20M | val = read<uint8_t>(sp); |
224 | 1.20M | r |= (uint64_t(val & 0x7f) << shift); |
225 | 1.20M | shift += 7; |
226 | 1.20M | } while (val & 0x80); |
227 | 892k | return r; |
228 | 892k | } |
229 | | |
230 | 521k | uint64_t readULEB(std::string_view& sp) { |
231 | 521k | uint8_t shift; |
232 | 521k | uint8_t val; |
233 | 521k | return readULEB(sp, shift, val); |
234 | 521k | } |
235 | | |
236 | | // Read SLEB (signed) varint value; algorithm from the DWARF spec |
237 | 371k | int64_t readSLEB(std::string_view& sp) { |
238 | 371k | uint8_t shift; |
239 | 371k | uint8_t val; |
240 | 371k | uint64_t r = readULEB(sp, shift, val); |
241 | | |
242 | 371k | if (shift < 64 && (val & 0x40)) { |
243 | 185k | r |= -(1ULL << shift); // sign extend |
244 | 185k | } |
245 | | |
246 | 371k | return r; |
247 | 371k | } |
248 | | |
249 | | // Read a value of "section offset" type, which may be 4 or 8 bytes |
250 | 374k | uint64_t readOffset(std::string_view& sp, bool is64_bit) { |
251 | 374k | return is64_bit ? read<uint64_t>(sp) : read<uint32_t>(sp); |
252 | 374k | } |
253 | | |
254 | | // Read "len" bytes |
255 | 0 | std::string_view readBytes(std::string_view& sp, uint64_t len) { |
256 | 0 | SAFE_CHECK(len <= sp.size(), "invalid string length: {} vs. {}", len, sp.size()); |
257 | 0 | std::string_view ret(sp.data(), len); |
258 | 0 | sp.remove_prefix(len); |
259 | 0 | return ret; |
260 | 0 | } |
261 | | |
262 | | // Read a null-terminated string |
263 | 48.3k | std::string_view readNullTerminated(std::string_view& sp) { |
264 | 48.3k | const char* p = static_cast<const char*>(memchr(sp.data(), 0, sp.size())); |
265 | 48.3k | SAFE_CHECK(p, "invalid null-terminated string"); |
266 | 48.3k | std::string_view ret(sp.data(), p - sp.data()); |
267 | 48.3k | sp = std::string_view(p + 1, sp.size()); |
268 | 48.3k | return ret; |
269 | 48.3k | } |
270 | | |
271 | | // Get a string from the section |
272 | 48.3k | std::string_view getStringFromStringSection(std::string_view section, uint64_t offset) { |
273 | 48.3k | SAFE_CHECK(offset < section.size(), "invalid section offset"); |
274 | 48.3k | std::string_view sp(section); |
275 | 48.3k | sp.remove_prefix(offset); |
276 | 48.3k | return readNullTerminated(sp); |
277 | 48.3k | } |
278 | | |
279 | | // Skip over padding until sp.data() - start is a multiple of alignment |
280 | 324k | void skipPadding(std::string_view& sp, const char* start, size_t alignment) { |
281 | 324k | size_t remainder = (sp.data() - start) % alignment; |
282 | 324k | if (remainder) { |
283 | 324k | SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding"); |
284 | 324k | sp.remove_prefix(alignment - remainder); |
285 | 324k | } |
286 | 324k | } |
287 | | |
288 | | } // namespace |
289 | | |
290 | | Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file) |
291 | 258 | : baseDir_(baseDir), subDir_(subDir), file_(file) { |
292 | 258 | using std::swap; |
293 | | |
294 | | // Normalize |
295 | 258 | if (file_.empty()) { |
296 | 0 | baseDir_ = {}; |
297 | 0 | subDir_ = {}; |
298 | 0 | return; |
299 | 0 | } |
300 | | |
301 | 258 | if (file_[0] == '/') { |
302 | | // file_ is absolute |
303 | 129 | baseDir_ = {}; |
304 | 129 | subDir_ = {}; |
305 | 129 | } |
306 | | |
307 | 258 | if (!subDir_.empty() && subDir_[0] == '/') { |
308 | 129 | baseDir_ = {}; // subDir_ is absolute |
309 | 129 | } |
310 | | |
311 | | // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't. |
312 | 258 | if (baseDir_.empty()) { |
313 | 258 | swap(baseDir_, subDir_); |
314 | 258 | } |
315 | 258 | } |
316 | | |
317 | 258 | size_t Dwarf::Path::size() const { |
318 | 258 | size_t size = 0; |
319 | 258 | bool needs_slash = false; |
320 | | |
321 | 258 | if (!baseDir_.empty()) { |
322 | 258 | size += baseDir_.size(); |
323 | 258 | needs_slash = baseDir_.back() != '/'; |
324 | 258 | } |
325 | | |
326 | 258 | if (!subDir_.empty()) { |
327 | 0 | size += needs_slash; |
328 | 0 | size += subDir_.size(); |
329 | 0 | needs_slash = subDir_.back() != '/'; |
330 | 0 | } |
331 | | |
332 | 258 | if (!file_.empty()) { |
333 | 258 | size += needs_slash; |
334 | 258 | size += file_.size(); |
335 | 258 | } |
336 | | |
337 | 258 | return size; |
338 | 258 | } |
339 | | |
340 | 0 | size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const { |
341 | 0 | size_t total_size = 0; |
342 | 0 | bool needs_slash = false; |
343 | |
|
344 | 0 | auto append = [&](std::string_view sp) { |
345 | 0 | if (bufSize >= 2) { |
346 | 0 | size_t to_copy = std::min(sp.size(), bufSize - 1); |
347 | 0 | memcpy(buf, sp.data(), to_copy); |
348 | 0 | buf += to_copy; |
349 | 0 | bufSize -= to_copy; |
350 | 0 | } |
351 | 0 | total_size += sp.size(); |
352 | 0 | }; |
353 | |
|
354 | 0 | if (!baseDir_.empty()) { |
355 | 0 | append(baseDir_); |
356 | 0 | needs_slash = baseDir_.back() != '/'; |
357 | 0 | } |
358 | 0 | if (!subDir_.empty()) { |
359 | 0 | if (needs_slash) { |
360 | 0 | append("/"); |
361 | 0 | } |
362 | 0 | append(subDir_); |
363 | 0 | needs_slash = subDir_.back() != '/'; |
364 | 0 | } |
365 | 0 | if (!file_.empty()) { |
366 | 0 | if (needs_slash) { |
367 | 0 | append("/"); |
368 | 0 | } |
369 | 0 | append(file_); |
370 | 0 | } |
371 | 0 | if (bufSize) { |
372 | 0 | *buf = '\0'; |
373 | 0 | } |
374 | |
|
375 | 0 | SAFE_CHECK(total_size == size(), "Size mismatch"); |
376 | 0 | return total_size; |
377 | 0 | } |
378 | | |
379 | 129 | void Dwarf::Path::toString(std::string& dest) const { |
380 | 129 | size_t initial_size = dest.size(); |
381 | 129 | dest.reserve(initial_size + size()); |
382 | 129 | if (!baseDir_.empty()) { |
383 | 129 | dest.append(baseDir_.begin(), baseDir_.end()); |
384 | 129 | } |
385 | 129 | if (!subDir_.empty()) { |
386 | 0 | if (!dest.empty() && dest.back() != '/') { |
387 | 0 | dest.push_back('/'); |
388 | 0 | } |
389 | 0 | dest.append(subDir_.begin(), subDir_.end()); |
390 | 0 | } |
391 | 129 | if (!file_.empty()) { |
392 | 129 | if (!dest.empty() && dest.back() != '/') { |
393 | 129 | dest.push_back('/'); |
394 | 129 | } |
395 | 129 | dest.append(file_.begin(), file_.end()); |
396 | 129 | } |
397 | 129 | SAFE_CHECK(dest.size() == initial_size + size(), "Size mismatch"); |
398 | 129 | } |
399 | | |
400 | | // Next chunk in section |
401 | 325k | bool Dwarf::Section::next(std::string_view& chunk) { |
402 | 325k | chunk = data; |
403 | 325k | if (chunk.empty()) { |
404 | 190 | return false; |
405 | 190 | } |
406 | | |
407 | | // Initial length is a uint32_t value for a 32-bit section, and |
408 | | // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit |
409 | | // section. |
410 | 325k | auto initial_length = read<uint32_t>(chunk); |
411 | 325k | is64_bit = (initial_length == uint32_t(-1)); |
412 | 325k | auto length = is64_bit ? read<uint64_t>(chunk) : initial_length; |
413 | 325k | SAFE_CHECK(length <= chunk.size(), "invalid DWARF section"); |
414 | 325k | chunk = std::string_view(chunk.data(), length); |
415 | 325k | data = std::string_view(chunk.end(), data.end() - chunk.end()); |
416 | 325k | return true; |
417 | 325k | } |
418 | | |
419 | 418 | std::string_view Dwarf::getSection(const char* name) const { |
420 | 418 | std::optional<Elf::Section> elf_section = elf_->findSectionByName(name); |
421 | 418 | if (!elf_section) { |
422 | 209 | return {}; |
423 | 209 | } |
424 | | |
425 | 209 | #ifdef SHF_COMPRESSED |
426 | 209 | if (elf_section->header.sh_flags & SHF_COMPRESSED) { |
427 | 0 | return {}; |
428 | 0 | } |
429 | 209 | #endif |
430 | | |
431 | 209 | return {elf_section->begin(), elf_section->size()}; |
432 | 209 | } |
433 | | |
434 | | // static |
435 | 258 | bool Dwarf::readAbbreviation(std::string_view& section, DIEAbbreviation& abbr) { |
436 | | // abbreviation code |
437 | 258 | abbr.code = readULEB(section); |
438 | 258 | if (abbr.code == 0) { |
439 | 0 | return false; |
440 | 0 | } |
441 | | |
442 | | // abbreviation tag |
443 | 258 | abbr.tag = readULEB(section); |
444 | | |
445 | | // does this entry have children? |
446 | 258 | abbr.has_children = (read<uint8_t>(section) != DW_CHILDREN_no); |
447 | | |
448 | | // attributes |
449 | 258 | const char* attribute_begin = section.data(); |
450 | 3.04k | for (;;) { |
451 | 3.04k | SAFE_CHECK(!section.empty(), "invalid attribute section"); |
452 | 3.04k | auto attr = readAttributeSpec(section); |
453 | 3.04k | if (attr.name == 0 && attr.form == 0) { |
454 | 258 | break; |
455 | 258 | } |
456 | 3.04k | } |
457 | | |
458 | 258 | abbr.attributes = std::string_view(attribute_begin, section.data() - attribute_begin); |
459 | 258 | return true; |
460 | 258 | } |
461 | | |
462 | | // static |
463 | 0 | void Dwarf::readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit& cu) { |
464 | 0 | abbrev.remove_prefix(cu.abbrev_offset); |
465 | |
|
466 | 0 | DIEAbbreviation abbr; |
467 | 0 | while (readAbbreviation(abbrev, abbr)) { |
468 | | // Abbreviation code 0 is reserved for null debugging information entries. |
469 | 0 | if (abbr.code != 0 && abbr.code <= kMaxAbbreviationEntries) { |
470 | 0 | cu.abbr_cache[abbr.code - 1] = abbr; |
471 | 0 | } |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | | size_t Dwarf::forEachChild(const CompilationUnit& cu, const Die& die, |
476 | 0 | std::function<bool(const Die& die)> f) const { |
477 | 0 | size_t next_die_offset = forEachAttribute(cu, die, [&](const Attribute&) { return true; }); |
478 | 0 | if (!die.abbr.has_children) { |
479 | 0 | return next_die_offset; |
480 | 0 | } |
481 | | |
482 | 0 | auto child_die = getDieAtOffset(cu, next_die_offset); |
483 | 0 | while (child_die.code != 0) { |
484 | 0 | if (!f(child_die)) { |
485 | 0 | return child_die.offset; |
486 | 0 | } |
487 | | |
488 | | // NOTE: Don't run `f` over grandchildren, just skip over them. |
489 | 0 | size_t sibling_offset = forEachChild(cu, child_die, [](const Die&) { return true; }); |
490 | 0 | child_die = getDieAtOffset(cu, sibling_offset); |
491 | 0 | } |
492 | | |
493 | | // childDie is now a dummy die whose offset is to the code 0 marking the |
494 | | // end of the children. Need to add one to get the offset of the next die. |
495 | 0 | return child_die.offset + 1; |
496 | 0 | } |
497 | | |
498 | | /* |
499 | | * Iterate over all attributes of the given DIE, calling the given callable |
500 | | * for each. Iteration is stopped early if any of the calls return false. |
501 | | */ |
502 | | size_t Dwarf::forEachAttribute(const CompilationUnit& cu, const Die& die, |
503 | 258 | std::function<bool(const Attribute& die)> f) const { |
504 | 258 | auto attrs = die.abbr.attributes; |
505 | 258 | auto values = std::string_view {info_.data() + die.offset + die.attr_offset, |
506 | 258 | cu.offset + cu.size - die.offset - die.attr_offset}; |
507 | 3.04k | while (auto spec = readAttributeSpec(attrs)) { |
508 | 2.78k | auto attr = readAttribute(cu, die, spec, values); |
509 | 2.78k | if (!f(attr)) { |
510 | 0 | return static_cast<size_t>(-1); |
511 | 0 | } |
512 | 2.78k | } |
513 | 258 | return values.data() - info_.data(); |
514 | 258 | } |
515 | | |
516 | | Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit& cu, const Die& die, AttributeSpec spec, |
517 | 2.78k | std::string_view& info) const { |
518 | | // DWARF 5 introduces new FORMs whose values are relative to some base attrs: |
519 | | // DW_AT_str_offsets_base, DW_AT_rnglists_base, DW_AT_addr_base. |
520 | | // Debug Fission DWARF 4 uses GNU DW_AT_GNU_ranges_base & DW_AT_GNU_addr_base. |
521 | | // |
522 | | // The order in which attributes appear in a CU is not defined. |
523 | | // The DW_AT_*_base attrs may appear after attributes that need them. |
524 | | // The DW_AT_*_base attrs are CU specific; so we read them just after |
525 | | // reading the CU header. During this first pass return empty values |
526 | | // when encountering a FORM that depends on DW_AT_*_base. |
527 | 2.78k | auto get_string_using_offset_table = [&](uint64_t index) { |
528 | 774 | if (!cu.str_offsets_base.has_value()) { |
529 | 258 | return std::string_view(); |
530 | 258 | } |
531 | | // DWARF 5: 7.26 String Offsets Table |
532 | | // The DW_AT_str_offsets_base attribute points to the first entry following |
533 | | // the header. The entries are indexed sequentially from this base entry, |
534 | | // starting from 0. |
535 | 516 | auto sp = str_offsets_.substr(*cu.str_offsets_base + |
536 | 516 | index * (cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t))); |
537 | 516 | uint64_t str_offset = readOffset(sp, cu.is64Bit); |
538 | 516 | return getStringFromStringSection(str_, str_offset); |
539 | 774 | }; |
540 | | |
541 | 2.78k | auto read_debug_addr = [&](uint64_t index) { |
542 | 0 | if (!cu.addr_base.has_value()) { |
543 | 0 | return uint64_t(0); |
544 | 0 | } |
545 | | // DWARF 5: 7.27 Address Table |
546 | | // The DW_AT_addr_base attribute points to the first entry following the |
547 | | // header. The entries are indexed sequentially from this base entry, |
548 | | // starting from 0. |
549 | 0 | auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); |
550 | 0 | return read<uint64_t>(sp); |
551 | 0 | }; |
552 | | |
553 | 2.78k | switch (spec.form) { |
554 | 258 | case DW_FORM_addr: |
555 | 258 | return {spec, die, read<uintptr_t>(info)}; |
556 | 0 | case DW_FORM_block1: |
557 | 0 | return {spec, die, readBytes(info, read<uint8_t>(info))}; |
558 | 0 | case DW_FORM_block2: |
559 | 0 | return {spec, die, readBytes(info, read<uint16_t>(info))}; |
560 | 0 | case DW_FORM_block4: |
561 | 0 | return {spec, die, readBytes(info, read<uint32_t>(info))}; |
562 | 0 | case DW_FORM_block: |
563 | 0 | [[fallthrough]]; |
564 | 0 | case DW_FORM_exprloc: |
565 | 0 | return {spec, die, readBytes(info, readULEB(info))}; |
566 | 0 | case DW_FORM_data1: |
567 | 0 | [[fallthrough]]; |
568 | 0 | case DW_FORM_ref1: |
569 | 0 | return {spec, die, read<uint8_t>(info)}; |
570 | 258 | case DW_FORM_data2: |
571 | 258 | [[fallthrough]]; |
572 | 258 | case DW_FORM_ref2: |
573 | 258 | return {spec, die, read<uint16_t>(info)}; |
574 | 0 | case DW_FORM_data4: |
575 | 0 | [[fallthrough]]; |
576 | 0 | case DW_FORM_ref4: |
577 | 0 | return {spec, die, read<uint32_t>(info)}; |
578 | 0 | case DW_FORM_data8: |
579 | 0 | [[fallthrough]]; |
580 | 0 | case DW_FORM_ref8: |
581 | 0 | [[fallthrough]]; |
582 | 0 | case DW_FORM_ref_sig8: |
583 | 0 | return {spec, die, read<uint64_t>(info)}; |
584 | 0 | case DW_FORM_sdata: |
585 | 0 | return {spec, die, static_cast<uint64_t>(readSLEB(info))}; |
586 | 0 | case DW_FORM_udata: |
587 | 0 | [[fallthrough]]; |
588 | 0 | case DW_FORM_ref_udata: |
589 | 0 | return {spec, die, readULEB(info)}; |
590 | 0 | case DW_FORM_flag: |
591 | 0 | return {spec, die, read<uint8_t>(info)}; |
592 | 0 | case DW_FORM_flag_present: |
593 | 0 | return {spec, die, 1ULL}; |
594 | 1.24k | case DW_FORM_sec_offset: |
595 | 1.24k | [[fallthrough]]; |
596 | 1.24k | case DW_FORM_ref_addr: |
597 | 1.24k | return {spec, die, readOffset(info, die.is64Bit)}; |
598 | 0 | case DW_FORM_string: |
599 | 0 | return {spec, die, readNullTerminated(info)}; |
600 | 0 | case DW_FORM_strp: |
601 | 0 | return {spec, die, getStringFromStringSection(str_, readOffset(info, die.is64Bit))}; |
602 | 0 | case DW_FORM_indirect: // form is explicitly specified |
603 | | // Update spec with the actual FORM. |
604 | 0 | spec.form = readULEB(info); |
605 | 0 | return readAttribute(cu, die, spec, info); |
606 | | |
607 | | // DWARF 5: |
608 | 0 | case DW_FORM_implicit_const: // form is explicitly specified |
609 | | // For attributes with this form, the attribute specification contains a |
610 | | // third part, which is a signed LEB128 number. The value of this number |
611 | | // is used as the value of the attribute, and no value is stored in the |
612 | | // .debug_info section. |
613 | 0 | return {spec, die, static_cast<uint64_t>(spec.implicitConst)}; |
614 | | |
615 | 0 | case DW_FORM_addrx: |
616 | 0 | return {spec, die, read_debug_addr(readULEB(info))}; |
617 | 0 | case DW_FORM_addrx1: |
618 | 0 | return {spec, die, read_debug_addr(readU64<1>(info))}; |
619 | 0 | case DW_FORM_addrx2: |
620 | 0 | return {spec, die, read_debug_addr(readU64<2>(info))}; |
621 | 0 | case DW_FORM_addrx3: |
622 | 0 | return {spec, die, read_debug_addr(readU64<3>(info))}; |
623 | 0 | case DW_FORM_addrx4: |
624 | 0 | return {spec, die, read_debug_addr(readU64<4>(info))}; |
625 | | |
626 | 0 | case DW_FORM_line_strp: |
627 | 0 | return {spec, die, getStringFromStringSection(line_str_, readOffset(info, die.is64Bit))}; |
628 | | |
629 | 0 | case DW_FORM_strx: |
630 | 0 | return {spec, die, get_string_using_offset_table(readULEB(info))}; |
631 | 774 | case DW_FORM_strx1: |
632 | 774 | return {spec, die, get_string_using_offset_table(readU64<1>(info))}; |
633 | 0 | case DW_FORM_strx2: |
634 | 0 | return {spec, die, get_string_using_offset_table(readU64<2>(info))}; |
635 | 0 | case DW_FORM_strx3: |
636 | 0 | return {spec, die, get_string_using_offset_table(readU64<3>(info))}; |
637 | 0 | case DW_FORM_strx4: |
638 | 0 | return {spec, die, get_string_using_offset_table(readU64<4>(info))}; |
639 | | |
640 | 258 | case DW_FORM_rnglistx: { |
641 | 258 | auto index = readULEB(info); |
642 | 258 | if (!cu.rnglists_base.has_value()) { |
643 | 129 | return {spec, die, 0ULL}; |
644 | 129 | } |
645 | 129 | const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); |
646 | 129 | auto sp = rnglists_.substr(*cu.rnglists_base + index * offset_size); |
647 | 129 | auto offset = readOffset(sp, cu.is64Bit); |
648 | 129 | return {spec, die, *cu.rnglists_base + offset}; |
649 | 258 | } |
650 | | |
651 | 0 | case DW_FORM_loclistx: { |
652 | 0 | auto index = readULEB(info); |
653 | 0 | if (!cu.loclists_base.has_value()) { |
654 | 0 | return {spec, die, 0ULL}; |
655 | 0 | } |
656 | 0 | const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); |
657 | 0 | auto sp = loclists_.substr(*cu.loclists_base + index * offset_size); |
658 | 0 | auto offset = readOffset(sp, cu.is64Bit); |
659 | 0 | return {spec, die, *cu.loclists_base + offset}; |
660 | 0 | } |
661 | | |
662 | 0 | case DW_FORM_data16: |
663 | 0 | return {spec, die, readBytes(info, 16)}; |
664 | | |
665 | 0 | case DW_FORM_ref_sup4: |
666 | 0 | case DW_FORM_ref_sup8: |
667 | 0 | case DW_FORM_strp_sup: |
668 | 0 | SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files"); |
669 | |
|
670 | 0 | default: |
671 | 0 | SAFE_CHECK(false, "invalid attribute form"); |
672 | 2.78k | } |
673 | 0 | return {spec, die, 0ULL}; |
674 | 2.78k | } |
675 | | |
676 | | // static |
677 | 6.09k | Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view& sp) { |
678 | 6.09k | Dwarf::AttributeSpec spec; |
679 | 6.09k | spec.name = readULEB(sp); |
680 | 6.09k | spec.form = readULEB(sp); |
681 | 6.09k | if (spec.form == DW_FORM_implicit_const) { |
682 | 0 | spec.implicitConst = readSLEB(sp); |
683 | 0 | } |
684 | 6.09k | return spec; |
685 | 6.09k | } |
686 | | |
687 | 129 | Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const { |
688 | | // SAFE_CHECK(offset < info_.size(), "unexpected offset"); |
689 | 129 | CompilationUnit cu; |
690 | 129 | std::string_view chunk(info_); |
691 | 129 | cu.offset = offset; |
692 | 129 | chunk.remove_prefix(offset); |
693 | | |
694 | | // 1) unit_length |
695 | 129 | auto initial_length = read<uint32_t>(chunk); |
696 | 129 | cu.is64Bit = (initial_length == uint32_t(-1)); |
697 | 129 | cu.size = cu.is64Bit ? read<uint64_t>(chunk) : initial_length; |
698 | 129 | SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size"); |
699 | 129 | cu.size += cu.is64Bit ? 12 : 4; |
700 | | |
701 | | // 2) version |
702 | 129 | cu.version = read<uint16_t>(chunk); |
703 | 129 | SAFE_CHECK(cu.version >= 2 && cu.version <= 5, "invalid info version"); |
704 | | |
705 | 129 | if (cu.version == 5) { |
706 | | // DWARF5: 7.5.1.1 Full and Partial Compilation Unit Headers |
707 | | // 3) unit_type (new DWARF 5) |
708 | 129 | cu.unit_type = read<uint8_t>(chunk); |
709 | 129 | if (cu.unit_type != DW_UT_compile && cu.unit_type != DW_UT_skeleton) { |
710 | 0 | return cu; |
711 | 0 | } |
712 | | // 4) address_size |
713 | 129 | cu.addr_size = read<uint8_t>(chunk); |
714 | 129 | SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); |
715 | | |
716 | | // 5) debug_abbrev_offset |
717 | 129 | cu.abbrev_offset = readOffset(chunk, cu.is64Bit); |
718 | | |
719 | 129 | if (cu.unit_type == DW_UT_skeleton) { |
720 | | // 6) dwo_id |
721 | 0 | read<uint64_t>(chunk); |
722 | 0 | } |
723 | 129 | } else { |
724 | | // DWARF4 has a single type of unit in .debug_info |
725 | 0 | cu.unit_type = DW_UT_compile; |
726 | | // 3) debug_abbrev_offset |
727 | 0 | cu.abbrev_offset = readOffset(chunk, cu.is64Bit); |
728 | | // 4) address_size |
729 | 0 | cu.addr_size = read<uint8_t>(chunk); |
730 | 0 | SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); |
731 | 0 | } |
732 | 129 | cu.first_die = chunk.data() - info_.data(); |
733 | 129 | if (cu.version < 5) { |
734 | 0 | return cu; |
735 | 0 | } |
736 | | |
737 | 129 | Die die = getDieAtOffset(cu, cu.first_die); |
738 | 129 | if (die.abbr.tag != DW_TAG_compile_unit) { |
739 | 0 | return cu; |
740 | 0 | } |
741 | | |
742 | | // Read the DW_AT_*_base attributes. |
743 | | // Attributes which use FORMs relative to these base attrs |
744 | | // will not have valid values during this first pass! |
745 | 1.39k | forEachAttribute(cu, die, [&](const Attribute& attr) { |
746 | 1.39k | switch (attr.spec.name) { |
747 | 129 | case DW_AT_addr_base: |
748 | 129 | case DW_AT_GNU_addr_base: |
749 | 129 | cu.addr_base = std::get<uint64_t>(attr.attr_value); |
750 | 129 | break; |
751 | 104 | case DW_AT_loclists_base: |
752 | 104 | cu.loclists_base = std::get<uint64_t>(attr.attr_value); |
753 | 104 | break; |
754 | 129 | case DW_AT_rnglists_base: |
755 | 129 | case DW_AT_GNU_ranges_base: |
756 | 129 | cu.rnglists_base = std::get<uint64_t>(attr.attr_value); |
757 | 129 | break; |
758 | 129 | case DW_AT_str_offsets_base: |
759 | 129 | cu.str_offsets_base = std::get<uint64_t>(attr.attr_value); |
760 | 129 | break; |
761 | 1.39k | } |
762 | 1.39k | return true; // continue forEachAttribute |
763 | 1.39k | }); |
764 | 129 | return cu; |
765 | 129 | } |
766 | | |
767 | | // Finds the Compilation Unit starting at offset. |
768 | 0 | Dwarf::CompilationUnit Dwarf::findCompilationUnit(uint64_t targetOffset) const { |
769 | | // SAFE_CHECK(targetOffset < info_.size(), "unexpected target address"); |
770 | 0 | uint64_t offset = 0; |
771 | 0 | while (offset < info_.size()) { |
772 | 0 | std::string_view chunk(info_); |
773 | 0 | chunk.remove_prefix(offset); |
774 | |
|
775 | 0 | auto initial_length = read<uint32_t>(chunk); |
776 | 0 | auto is64_bit = (initial_length == static_cast<uint32_t>(-1)); |
777 | 0 | auto size = is64_bit ? read<uint64_t>(chunk) : initial_length; |
778 | 0 | SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); |
779 | 0 | size += is64_bit ? 12 : 4; |
780 | |
|
781 | 0 | if (offset + size > targetOffset) { |
782 | 0 | break; |
783 | 0 | } |
784 | 0 | offset += size; |
785 | 0 | } |
786 | 0 | return getCompilationUnit(offset); |
787 | 0 | } |
788 | | |
789 | 258 | Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const { |
790 | | // Linear search in the .debug_abbrev section, starting at offset |
791 | 258 | std::string_view section = abbrev_; |
792 | 258 | section.remove_prefix(offset); |
793 | | |
794 | 258 | Dwarf::DIEAbbreviation abbr; |
795 | 258 | while (readAbbreviation(section, abbr)) { |
796 | 258 | if (abbr.code == code) { |
797 | 258 | return abbr; |
798 | 258 | } |
799 | 258 | } |
800 | | |
801 | 0 | SAFE_CHECK(false, "could not find abbreviation code"); |
802 | 0 | } |
803 | | |
804 | | Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view& sp, uint64_t form, |
805 | 0 | bool is64_bit) const { |
806 | 0 | switch (form) { |
807 | 0 | case DW_FORM_addr: |
808 | 0 | return uint64_t(read<uintptr_t>(sp)); |
809 | 0 | case DW_FORM_block1: |
810 | 0 | return readBytes(sp, read<uint8_t>(sp)); |
811 | 0 | case DW_FORM_block2: |
812 | 0 | return readBytes(sp, read<uint16_t>(sp)); |
813 | 0 | case DW_FORM_block4: |
814 | 0 | return readBytes(sp, read<uint32_t>(sp)); |
815 | 0 | case DW_FORM_block: |
816 | 0 | [[fallthrough]]; |
817 | 0 | case DW_FORM_exprloc: |
818 | 0 | return readBytes(sp, readULEB(sp)); |
819 | 0 | case DW_FORM_data1: |
820 | 0 | [[fallthrough]]; |
821 | 0 | case DW_FORM_ref1: |
822 | 0 | return uint64_t(read<uint8_t>(sp)); |
823 | 0 | case DW_FORM_data2: |
824 | 0 | [[fallthrough]]; |
825 | 0 | case DW_FORM_ref2: |
826 | 0 | return uint64_t(read<uint16_t>(sp)); |
827 | 0 | case DW_FORM_data4: |
828 | 0 | [[fallthrough]]; |
829 | 0 | case DW_FORM_ref4: |
830 | 0 | return uint64_t(read<uint32_t>(sp)); |
831 | 0 | case DW_FORM_data8: |
832 | 0 | [[fallthrough]]; |
833 | 0 | case DW_FORM_ref8: |
834 | 0 | return read<uint64_t>(sp); |
835 | 0 | case DW_FORM_sdata: |
836 | 0 | return uint64_t(readSLEB(sp)); |
837 | 0 | case DW_FORM_udata: |
838 | 0 | [[fallthrough]]; |
839 | 0 | case DW_FORM_ref_udata: |
840 | 0 | return readULEB(sp); |
841 | 0 | case DW_FORM_flag: |
842 | 0 | return uint64_t(read<uint8_t>(sp)); |
843 | 0 | case DW_FORM_flag_present: |
844 | 0 | return uint64_t(1); |
845 | 0 | case DW_FORM_sec_offset: |
846 | 0 | [[fallthrough]]; |
847 | 0 | case DW_FORM_ref_addr: |
848 | 0 | return readOffset(sp, is64_bit); |
849 | 0 | case DW_FORM_string: |
850 | 0 | return readNullTerminated(sp); |
851 | 0 | case DW_FORM_strp: |
852 | 0 | return getStringFromStringSection(str_, readOffset(sp, is64_bit)); |
853 | 0 | case DW_FORM_indirect: // form is explicitly specified |
854 | 0 | return readAttributeValue(sp, readULEB(sp), is64_bit); |
855 | 0 | default: |
856 | 0 | SAFE_CHECK(false, "invalid attribute form"); |
857 | 0 | return uint64_t(1); |
858 | 0 | ; |
859 | 0 | } |
860 | 0 | } |
861 | | |
862 | | /** |
863 | | * Find @address in .debug_aranges and return the offset in |
864 | | * .debug_info for compilation unit to which this address belongs. |
865 | | */ |
866 | 319 | bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t& offset) { |
867 | 319 | Section aranges_section(aranges); |
868 | 319 | std::string_view chunk; |
869 | 325k | while (aranges_section.next(chunk)) { |
870 | 324k | auto version = read<uint16_t>(chunk); |
871 | 324k | SAFE_CHECK(version == 2, "invalid aranges version"); |
872 | | |
873 | 324k | offset = readOffset(chunk, aranges_section.is64Bit()); |
874 | 324k | auto address_size = read<uint8_t>(chunk); |
875 | 324k | SAFE_CHECK(address_size == sizeof(uintptr_t), "invalid address size"); |
876 | 324k | auto segment_size = read<uint8_t>(chunk); |
877 | 324k | SAFE_CHECK(segment_size == 0, "segmented architecture not supported"); |
878 | | |
879 | | // Padded to a multiple of 2 addresses. |
880 | | // Strangely enough, this is the only place in the DWARF spec that requires |
881 | | // padding. |
882 | 324k | skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t)); |
883 | 694M | for (;;) { |
884 | 694M | auto start = read<uintptr_t>(chunk); |
885 | 694M | auto length = read<uintptr_t>(chunk); |
886 | | |
887 | 694M | if (start == 0 && length == 0) { |
888 | 324k | break; |
889 | 324k | } |
890 | | |
891 | | // Is our address in this range? |
892 | 694M | if (address >= start && address < start + length) { |
893 | 129 | return true; |
894 | 129 | } |
895 | 694M | } |
896 | 324k | } |
897 | 190 | return false; |
898 | 319 | } |
899 | | |
900 | 258 | Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit& cu, uint64_t offset) const { |
901 | 258 | SAFE_CHECK(offset < info_.size(), "unexpected offset {}, info size {}", offset, info_.size()); |
902 | 258 | Die die; |
903 | 258 | std::string_view sp {info_.data() + offset, cu.offset + cu.size - offset}; |
904 | 258 | die.offset = offset; |
905 | 258 | die.is64Bit = cu.is64Bit; |
906 | 258 | auto code = readULEB(sp); |
907 | 258 | die.code = code; |
908 | 258 | if (code == 0) { |
909 | 0 | return die; |
910 | 0 | } |
911 | 258 | die.attr_offset = sp.data() - info_.data() - offset; |
912 | 258 | die.abbr = !cu.abbr_cache.empty() && die.code < kMaxAbbreviationEntries |
913 | 258 | ? cu.abbr_cache[die.code - 1] |
914 | 258 | : getAbbreviation(die.code, cu.abbrev_offset); |
915 | | |
916 | 258 | return die; |
917 | 258 | } |
918 | | |
919 | | /** |
920 | | * Find the @locationInfo for @address in the compilation unit represented |
921 | | * by the @sp .debug_info entry. |
922 | | * Returns whether the address was found. |
923 | | * Advances @sp to the next entry in .debug_info. |
924 | | */ |
925 | | bool Dwarf::findLocation(uintptr_t address, const LocationInfoMode mode, CompilationUnit& cu, |
926 | 129 | LocationInfo& info, std::vector<SymbolizedFrame>& inline_frames) const { |
927 | 129 | Die die = getDieAtOffset(cu, cu.first_die); |
928 | | // Partial compilation unit (DW_TAG_partial_unit) is not supported. |
929 | 129 | SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry"); |
930 | | |
931 | | // Offset in .debug_line for the line number VM program for this CU |
932 | 129 | std::optional<uint64_t> line_offset = 0; |
933 | 129 | std::string_view compilation_directory; |
934 | 129 | std::optional<std::string_view> main_file_name; |
935 | 129 | std::optional<uint64_t> base_addr_cu; |
936 | | |
937 | 1.39k | forEachAttribute(cu, die, [&](const Attribute& attr) { |
938 | 1.39k | switch (attr.spec.name) { |
939 | 129 | case DW_AT_stmt_list: |
940 | | // Offset in .debug_line for the line number VM program for this |
941 | | // compilation unit |
942 | 129 | line_offset = std::get<uint64_t>(attr.attr_value); |
943 | 129 | break; |
944 | 129 | case DW_AT_comp_dir: |
945 | | // Compilation directory |
946 | 129 | compilation_directory = std::get<std::string_view>(attr.attr_value); |
947 | 129 | break; |
948 | 129 | case DW_AT_name: |
949 | | // File name of main file being compiled |
950 | 129 | main_file_name = std::get<std::string_view>(attr.attr_value); |
951 | 129 | break; |
952 | 129 | case DW_AT_low_pc: |
953 | 129 | case DW_AT_entry_pc: |
954 | | // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was |
955 | | // introduced in DWARF3. Support either to determine the base address of |
956 | | // the CU. |
957 | 129 | base_addr_cu = std::get<uint64_t>(attr.attr_value); |
958 | 129 | break; |
959 | 1.39k | } |
960 | | // Iterate through all attributes until find all above. |
961 | 1.39k | return true; |
962 | 1.39k | }); |
963 | | |
964 | 129 | if (main_file_name) { |
965 | 129 | info.has_main_file = true; |
966 | 129 | info.main_file = Path(compilation_directory, "", *main_file_name); |
967 | 129 | } |
968 | | |
969 | 129 | if (!line_offset) { |
970 | 0 | return false; |
971 | 0 | } |
972 | | |
973 | 129 | std::string_view line_section(line_); |
974 | 129 | line_section.remove_prefix(*line_offset); |
975 | 129 | LineNumberVM line_vm(line_section, compilation_directory, str_, line_str_); |
976 | | |
977 | | // Execute line number VM program to find file and line |
978 | 129 | info.has_file_and_line = line_vm.findAddress(address, info.file, info.line); |
979 | | |
980 | 129 | bool check_inline = (mode == LocationInfoMode::FULL_WITH_INLINE); |
981 | | |
982 | 129 | if (info.has_file_and_line && check_inline) { |
983 | | // Re-get the compilation unit with abbreviation cached. |
984 | 0 | cu.abbr_cache.clear(); |
985 | 0 | cu.abbr_cache.resize(kMaxAbbreviationEntries); |
986 | 0 | readCompilationUnitAbbrs(abbrev_, cu); |
987 | | |
988 | | // Find the subprogram that matches the given address. |
989 | 0 | Die subprogram; |
990 | 0 | findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram); |
991 | | |
992 | | // Subprogram is the DIE of caller function. |
993 | 0 | if (/*check_inline &&*/ subprogram.abbr.has_children) { |
994 | | // Use an extra location and get its call file and call line, so that |
995 | | // they can be used for the second last location when we don't have |
996 | | // enough inline frames for all inline functions call stack. |
997 | 0 | const size_t max_size = Dwarf::kMaxInlineLocationInfoPerFrame + 1; |
998 | 0 | std::vector<CallLocation> call_locations; |
999 | 0 | call_locations.reserve(Dwarf::kMaxInlineLocationInfoPerFrame + 1); |
1000 | |
|
1001 | 0 | findInlinedSubroutineDieForAddress(cu, subprogram, line_vm, address, base_addr_cu, |
1002 | 0 | call_locations, max_size); |
1003 | 0 | size_t num_found = call_locations.size(); |
1004 | |
|
1005 | 0 | if (num_found > 0) { |
1006 | 0 | const auto inner_most_file = info.file; |
1007 | 0 | const auto inner_most_line = info.line; |
1008 | | |
1009 | | // Earlier we filled in locationInfo: |
1010 | | // - mainFile: the path to the CU -- the file where the non-inlined |
1011 | | // call is made from. |
1012 | | // - file + line: the location of the inner-most inlined call. |
1013 | | // Here we already find inlined info so mainFile would be redundant. |
1014 | 0 | info.has_main_file = false; |
1015 | 0 | info.main_file = Path {}; |
1016 | | // @findInlinedSubroutineDieForAddress fills inlineLocations[0] with the |
1017 | | // file+line of the non-inlined outer function making the call. |
1018 | | // locationInfo.name is already set by the caller by looking up the |
1019 | | // non-inlined function @address belongs to. |
1020 | 0 | info.has_file_and_line = true; |
1021 | 0 | info.file = call_locations[0].file; |
1022 | 0 | info.line = call_locations[0].line; |
1023 | | |
1024 | | // The next inlined subroutine's call file and call line is the current |
1025 | | // caller's location. |
1026 | 0 | for (size_t i = 0; i < num_found - 1; ++i) { |
1027 | 0 | call_locations[i].file = call_locations[i + 1].file; |
1028 | 0 | call_locations[i].line = call_locations[i + 1].line; |
1029 | 0 | } |
1030 | | // CallLocation for the inner-most inlined function: |
1031 | | // - will be computed if enough space was available in the passed |
1032 | | // buffer. |
1033 | | // - will have a .name, but no !.file && !.line |
1034 | | // - its corresponding file+line is the one returned by LineVM based |
1035 | | // on @address. |
1036 | | // Use the inner-most inlined file+line info we got from the LineVM. |
1037 | 0 | call_locations[num_found - 1].file = inner_most_file; |
1038 | 0 | call_locations[num_found - 1].line = inner_most_line; |
1039 | | |
1040 | | // Fill in inline frames in reverse order (as expected by the caller). |
1041 | 0 | std::reverse(call_locations.begin(), call_locations.end()); |
1042 | 0 | for (const auto& call_location : call_locations) { |
1043 | 0 | SymbolizedFrame inline_frame; |
1044 | 0 | inline_frame.found = true; |
1045 | 0 | inline_frame.addr = address; |
1046 | 0 | if (!call_location.name.empty()) { |
1047 | 0 | inline_frame.name = call_location.name.data(); |
1048 | 0 | } else { |
1049 | 0 | inline_frame.name = nullptr; |
1050 | 0 | } |
1051 | 0 | inline_frame.location.has_file_and_line = true; |
1052 | 0 | inline_frame.location.file = call_location.file; |
1053 | 0 | inline_frame.location.line = call_location.line; |
1054 | 0 | inline_frames.push_back(inline_frame); |
1055 | 0 | } |
1056 | 0 | } |
1057 | 0 | } |
1058 | 0 | } |
1059 | | |
1060 | 129 | return info.has_file_and_line; |
1061 | 129 | } |
1062 | | |
1063 | | void Dwarf::findSubProgramDieForAddress(const CompilationUnit& cu, const Die& die, uint64_t address, |
1064 | | std::optional<uint64_t> base_addr_cu, |
1065 | 0 | Die& subprogram) const { |
1066 | 0 | forEachChild(cu, die, [&](const Die& child_die) { |
1067 | 0 | if (child_die.abbr.tag == DW_TAG_subprogram) { |
1068 | 0 | std::optional<uint64_t> low_pc; |
1069 | 0 | std::optional<uint64_t> high_pc; |
1070 | 0 | std::optional<bool> is_high_pc_addr; |
1071 | 0 | std::optional<uint64_t> range_offset; |
1072 | 0 | forEachAttribute(cu, child_die, [&](const Attribute& attr) { |
1073 | 0 | switch (attr.spec.name) { |
1074 | 0 | case DW_AT_ranges: |
1075 | 0 | range_offset = std::get<uint64_t>(attr.attr_value); |
1076 | 0 | break; |
1077 | 0 | case DW_AT_low_pc: |
1078 | 0 | low_pc = std::get<uint64_t>(attr.attr_value); |
1079 | 0 | break; |
1080 | 0 | case DW_AT_high_pc: |
1081 | | // The value of the DW_AT_high_pc attribute can be |
1082 | | // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). |
1083 | 0 | is_high_pc_addr = attr.spec.form == DW_FORM_addr || // |
1084 | 0 | attr.spec.form == DW_FORM_addrx || // |
1085 | 0 | attr.spec.form == DW_FORM_addrx1 || // |
1086 | 0 | attr.spec.form == DW_FORM_addrx2 || // |
1087 | 0 | attr.spec.form == DW_FORM_addrx3 || // |
1088 | 0 | attr.spec.form == DW_FORM_addrx4; |
1089 | 0 | high_pc = std::get<uint64_t>(attr.attr_value); |
1090 | 0 | break; |
1091 | 0 | } |
1092 | | // Iterate through all attributes until find all above. |
1093 | 0 | return true; |
1094 | 0 | }); |
1095 | 0 | bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && |
1096 | 0 | (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); |
1097 | 0 | bool range_match = |
1098 | 0 | range_offset && isAddrInRangeList(cu, address, base_addr_cu, |
1099 | 0 | range_offset.value(), cu.addr_size); |
1100 | 0 | if (pc_match || range_match) { |
1101 | 0 | subprogram = child_die; |
1102 | 0 | return false; |
1103 | 0 | } |
1104 | 0 | } |
1105 | | |
1106 | 0 | findSubProgramDieForAddress(cu, child_die, address, base_addr_cu, subprogram); |
1107 | | |
1108 | | // Iterates through children until find the inline subprogram. |
1109 | 0 | return true; |
1110 | 0 | }); |
1111 | 0 | } |
1112 | | |
1113 | | /** |
1114 | | * Find DW_TAG_inlined_subroutine child DIEs that contain @address and |
1115 | | * then extract: |
1116 | | * - Where was it called from (DW_AT_call_file & DW_AT_call_line): |
1117 | | * the statement or expression that caused the inline expansion. |
1118 | | * - The inlined function's name. As a function may be inlined multiple |
1119 | | * times, common attributes like DW_AT_linkage_name or DW_AT_name |
1120 | | * are only stored in its "concrete out-of-line instance" (a |
1121 | | * DW_TAG_subprogram) which we find using DW_AT_abstract_origin. |
1122 | | */ |
1123 | | void Dwarf::findInlinedSubroutineDieForAddress(const CompilationUnit& cu, const Die& die, |
1124 | | const LineNumberVM& line_vm, uint64_t address, |
1125 | | std::optional<uint64_t> base_addr_cu, |
1126 | | std::vector<CallLocation>& locations, |
1127 | 0 | const size_t max_size) const { |
1128 | 0 | if (locations.size() >= max_size) { |
1129 | 0 | return; |
1130 | 0 | } |
1131 | | |
1132 | 0 | forEachChild(cu, die, [&](const Die& child_die) { |
1133 | | // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might |
1134 | | // have arbitrary intermediary "nodes", including DW_TAG_common_block, |
1135 | | // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and |
1136 | | // DW_TAG_with_stmt, etc. |
1137 | | // We can't filter with locationhere since its range may be not specified. |
1138 | | // See section 2.6.2: A location list containing only an end of list entry |
1139 | | // describes an object that exists in the source code but not in the |
1140 | | // executable program. |
1141 | 0 | if (child_die.abbr.tag == DW_TAG_try_block || child_die.abbr.tag == DW_TAG_catch_block || |
1142 | 0 | child_die.abbr.tag == DW_TAG_entry_point || child_die.abbr.tag == DW_TAG_common_block || |
1143 | 0 | child_die.abbr.tag == DW_TAG_lexical_block) { |
1144 | 0 | findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, |
1145 | 0 | locations, max_size); |
1146 | 0 | return true; |
1147 | 0 | } |
1148 | | |
1149 | 0 | std::optional<uint64_t> low_pc; |
1150 | 0 | std::optional<uint64_t> high_pc; |
1151 | 0 | std::optional<bool> is_high_pc_addr; |
1152 | 0 | std::optional<uint64_t> abstract_origin; |
1153 | 0 | std::optional<uint64_t> abstract_origin_ref_type; |
1154 | 0 | std::optional<uint64_t> call_file; |
1155 | 0 | std::optional<uint64_t> call_line; |
1156 | 0 | std::optional<uint64_t> range_offset; |
1157 | 0 | forEachAttribute(cu, child_die, [&](const Attribute& attr) { |
1158 | 0 | switch (attr.spec.name) { |
1159 | 0 | case DW_AT_ranges: |
1160 | 0 | range_offset = std::get<uint64_t>(attr.attr_value); |
1161 | 0 | break; |
1162 | 0 | case DW_AT_low_pc: |
1163 | 0 | low_pc = std::get<uint64_t>(attr.attr_value); |
1164 | 0 | break; |
1165 | 0 | case DW_AT_high_pc: |
1166 | | // The value of the DW_AT_high_pc attribute can be |
1167 | | // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). |
1168 | 0 | is_high_pc_addr = attr.spec.form == DW_FORM_addr || // |
1169 | 0 | attr.spec.form == DW_FORM_addrx || // |
1170 | 0 | attr.spec.form == DW_FORM_addrx1 || // |
1171 | 0 | attr.spec.form == DW_FORM_addrx2 || // |
1172 | 0 | attr.spec.form == DW_FORM_addrx3 || // |
1173 | 0 | attr.spec.form == DW_FORM_addrx4; |
1174 | 0 | high_pc = std::get<uint64_t>(attr.attr_value); |
1175 | 0 | break; |
1176 | 0 | case DW_AT_abstract_origin: |
1177 | 0 | abstract_origin_ref_type = attr.spec.form; |
1178 | 0 | abstract_origin = std::get<uint64_t>(attr.attr_value); |
1179 | 0 | break; |
1180 | 0 | case DW_AT_call_line: |
1181 | 0 | call_line = std::get<uint64_t>(attr.attr_value); |
1182 | 0 | break; |
1183 | 0 | case DW_AT_call_file: |
1184 | 0 | call_file = std::get<uint64_t>(attr.attr_value); |
1185 | 0 | break; |
1186 | 0 | } |
1187 | | // Iterate through all until find all above attributes. |
1188 | 0 | return true; |
1189 | 0 | }); |
1190 | | |
1191 | | // 2.17 Code Addresses and Ranges |
1192 | | // Any debugging information entry describing an entity that has a |
1193 | | // machine code address or range of machine code addresses, |
1194 | | // which includes compilation units, module initialization, subroutines, |
1195 | | // ordinary blocks, try/catch blocks, labels and the like, may have |
1196 | | // - A DW_AT_low_pc attribute for a single address, |
1197 | | // - A DW_AT_low_pc and DW_AT_high_pc pair of attributes for a |
1198 | | // single contiguous range of addresses, or |
1199 | | // - A DW_AT_ranges attribute for a non-contiguous range of addresses. |
1200 | | // TODO: Support DW_TAG_entry_point and DW_TAG_common_block that don't |
1201 | | // have DW_AT_low_pc/DW_AT_high_pc pairs and DW_AT_ranges. |
1202 | | // TODO: Support relocated address which requires lookup in relocation map. |
1203 | 0 | bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && |
1204 | 0 | (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); |
1205 | 0 | bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, |
1206 | 0 | range_offset.value(), cu.addr_size); |
1207 | 0 | if (!pc_match && !range_match) { |
1208 | | // Address doesn't match. Keep searching other children. |
1209 | 0 | return true; |
1210 | 0 | } |
1211 | | |
1212 | 0 | if (!abstract_origin || !abstract_origin_ref_type || !call_line || !call_file) { |
1213 | | // We expect a single sibling DIE to match on addr, but it's missing |
1214 | | // required fields. Stop searching for other DIEs. |
1215 | 0 | return false; |
1216 | 0 | } |
1217 | | |
1218 | 0 | CallLocation location; |
1219 | 0 | location.file = line_vm.getFullFileName(*call_file); |
1220 | 0 | location.line = *call_line; |
1221 | | |
1222 | | /// Something wrong with receiving debug info about inline. |
1223 | | /// If set to true we stop parsing DWARF. |
1224 | 0 | bool die_for_inline_broken = false; |
1225 | |
|
1226 | 0 | auto get_function_name = [&](const CompilationUnit& srcu, uint64_t die_offset) { |
1227 | 0 | Die decl_die = getDieAtOffset(srcu, die_offset); |
1228 | 0 | auto& die_to_look_for_name = decl_die; |
1229 | |
|
1230 | 0 | Die def_die; |
1231 | | // Jump to the actual function definition instead of declaration for name |
1232 | | // and line info. |
1233 | | // DW_AT_specification: Incomplete, non-defining, or separate declaration |
1234 | | // corresponding to a declaration |
1235 | 0 | auto offset = getAttribute<uint64_t>(srcu, decl_die, DW_AT_specification); |
1236 | 0 | if (offset) { |
1237 | | /// FIXME: actually it's a bug in our DWARF parser. |
1238 | | /// |
1239 | | /// Most of the times compilation unit offset (srcu.offset) is some big number inside .debug_info (like 434782255). |
1240 | | /// Offset of DIE definition is some small relative number to srcu.offset (like 3518). |
1241 | | /// However in some unknown cases offset looks like global, non relative number (like 434672579) and in this |
1242 | | /// case we obviously doing something wrong parsing DWARF. |
1243 | | /// |
1244 | | /// What is important -- this bug? reproduces only with -flto=thin in release mode. |
1245 | | /// Also llvm-dwarfdump --verify ./clickhouse says that our DWARF is ok, so it's another prove |
1246 | | /// that we just doing something wrong. |
1247 | | /// |
1248 | | /// FIXME: Currently we just give up parsing DWARF for inlines when we got into this situation. |
1249 | 0 | if (srcu.offset + offset.value() >= info_.size()) { |
1250 | 0 | die_for_inline_broken = true; |
1251 | 0 | } else { |
1252 | 0 | def_die = getDieAtOffset(srcu, srcu.offset + offset.value()); |
1253 | 0 | die_to_look_for_name = def_die; |
1254 | 0 | } |
1255 | 0 | } |
1256 | |
|
1257 | 0 | std::string_view name; |
1258 | |
|
1259 | 0 | if (die_for_inline_broken) { |
1260 | 0 | return name; |
1261 | 0 | } |
1262 | | |
1263 | | // The file and line will be set in the next inline subroutine based on |
1264 | | // its DW_AT_call_file and DW_AT_call_line. |
1265 | 0 | forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute& attr) { |
1266 | 0 | switch (attr.spec.name) { |
1267 | 0 | case DW_AT_linkage_name: |
1268 | 0 | name = std::get<std::string_view>(attr.attr_value); |
1269 | 0 | break; |
1270 | 0 | case DW_AT_name: |
1271 | | // NOTE: when DW_AT_linkage_name and DW_AT_name match, dwarf |
1272 | | // emitters omit DW_AT_linkage_name (to save space). If present |
1273 | | // DW_AT_linkage_name should always be preferred (mangled C++ name |
1274 | | // vs just the function name). |
1275 | 0 | if (name.empty()) { |
1276 | 0 | name = std::get<std::string_view>(attr.attr_value); |
1277 | 0 | } |
1278 | 0 | break; |
1279 | 0 | } |
1280 | 0 | return true; |
1281 | 0 | }); |
1282 | 0 | return name; |
1283 | 0 | }; |
1284 | | |
1285 | | // DW_AT_abstract_origin is a reference. There a 3 types of references: |
1286 | | // - the reference can identify any debugging information entry within the |
1287 | | // compilation unit (DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4, |
1288 | | // DW_FORM_ref8, DW_FORM_ref_udata). This type of reference is an offset |
1289 | | // from the first byte of the compilation header for the compilation unit |
1290 | | // containing the reference. |
1291 | | // - the reference can identify any debugging information entry within a |
1292 | | // .debug_info section; in particular, it may refer to an entry in a |
1293 | | // different compilation unit (DW_FORM_ref_addr) |
1294 | | // - the reference can identify any debugging information type entry that |
1295 | | // has been placed in its own type unit. |
1296 | | // Not applicable for DW_AT_abstract_origin. |
1297 | 0 | location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr) |
1298 | 0 | ? get_function_name(cu, cu.offset + *abstract_origin) |
1299 | 0 | : get_function_name(findCompilationUnit(*abstract_origin), |
1300 | 0 | *abstract_origin); |
1301 | | |
1302 | | /// FIXME: see comment above |
1303 | 0 | if (die_for_inline_broken) { |
1304 | 0 | return false; |
1305 | 0 | } |
1306 | | |
1307 | 0 | locations.push_back(location); |
1308 | |
|
1309 | 0 | findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, |
1310 | 0 | max_size); |
1311 | |
|
1312 | 0 | return false; |
1313 | 0 | }); |
1314 | 0 | } |
1315 | | |
1316 | | bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo, LocationInfoMode mode, |
1317 | 338 | std::vector<SymbolizedFrame>& inline_frames) const { |
1318 | 338 | locationInfo = LocationInfo(); |
1319 | | |
1320 | 338 | if (mode == LocationInfoMode::DISABLED) { |
1321 | 0 | return false; |
1322 | 0 | } |
1323 | | |
1324 | 338 | if (!elf_) { // No file. |
1325 | 19 | return false; |
1326 | 19 | } |
1327 | | |
1328 | 319 | if (!aranges_.empty()) { |
1329 | | // Fast path: find the right .debug_info entry by looking up the |
1330 | | // address in .debug_aranges. |
1331 | 319 | uint64_t offset = 0; |
1332 | 319 | if (findDebugInfoOffset(address, aranges_, offset)) { |
1333 | | // Read compilation unit header from .debug_info |
1334 | 129 | auto unit = getCompilationUnit(offset); |
1335 | 129 | if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) { |
1336 | 0 | return false; |
1337 | 0 | } |
1338 | 129 | findLocation(address, mode, unit, locationInfo, inline_frames); |
1339 | 129 | return locationInfo.has_file_and_line; |
1340 | 190 | } else if (mode == LocationInfoMode::FAST) { |
1341 | | // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries |
1342 | | // in .debug_aranges for some functions, but always generates |
1343 | | // .debug_info entries. Scanning .debug_info is slow, so fall back to |
1344 | | // it only if such behavior is requested via LocationInfoMode. |
1345 | 190 | return false; |
1346 | 190 | } else { |
1347 | 0 | SAFE_CHECK(mode == LocationInfoMode::FULL || mode == LocationInfoMode::FULL_WITH_INLINE, |
1348 | 0 | "unexpected mode"); |
1349 | | // Fall back to the linear scan. |
1350 | 0 | } |
1351 | 319 | } |
1352 | | |
1353 | | // Slow path (linear scan): Iterate over all .debug_info entries |
1354 | | // and look for the address in each compilation unit. |
1355 | 0 | uint64_t offset = 0; |
1356 | 0 | while (offset < info_.size() && !locationInfo.has_file_and_line) { |
1357 | 0 | auto unit = getCompilationUnit(offset); |
1358 | 0 | offset += unit.size; |
1359 | 0 | if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) { |
1360 | 0 | continue; |
1361 | 0 | } |
1362 | 0 | findLocation(address, mode, unit, locationInfo, inline_frames); |
1363 | 0 | } |
1364 | |
|
1365 | 0 | return locationInfo.has_file_and_line; |
1366 | 319 | } |
1367 | | |
1368 | | bool Dwarf::isAddrInRangeList(const CompilationUnit& cu, uint64_t address, |
1369 | | std::optional<uint64_t> base_addr, size_t offset, |
1370 | 0 | uint8_t addr_size) const { |
1371 | 0 | SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size"); |
1372 | 0 | if (cu.version <= 4 && !ranges_.empty()) { |
1373 | 0 | const bool is64_bit_addr = addr_size == 8; |
1374 | 0 | std::string_view sp = ranges_; |
1375 | 0 | sp.remove_prefix(offset); |
1376 | 0 | const uint64_t max_addr = is64_bit_addr ? std::numeric_limits<uint64_t>::max() |
1377 | 0 | : std::numeric_limits<uint32_t>::max(); |
1378 | 0 | while (!sp.empty()) { |
1379 | 0 | uint64_t begin = readOffset(sp, is64_bit_addr); |
1380 | 0 | uint64_t end = readOffset(sp, is64_bit_addr); |
1381 | | // The range list entry is a base address selection entry. |
1382 | 0 | if (begin == max_addr) { |
1383 | 0 | base_addr = end; |
1384 | 0 | continue; |
1385 | 0 | } |
1386 | | // The range list entry is an end of list entry. |
1387 | 0 | if (begin == 0 && end == 0) { |
1388 | 0 | break; |
1389 | 0 | } |
1390 | | |
1391 | | // Check if the given address falls in the range list entry. |
1392 | | // 2.17.3 Non-Contiguous Address Ranges |
1393 | | // The applicable base address of a range list entry is determined by the |
1394 | | // closest preceding base address selection entry (see below) in the same |
1395 | | // range list. If there is no such selection entry, then the applicable |
1396 | | // base address defaults to the base address of the compilation unit. |
1397 | 0 | if (base_addr && address >= begin + *base_addr && address < end + *base_addr) { |
1398 | 0 | return true; |
1399 | 0 | } |
1400 | 0 | } |
1401 | 0 | } |
1402 | | |
1403 | 0 | if (cu.version == 5 && !rnglists_.empty() && cu.addr_base.has_value()) { |
1404 | 0 | auto rnglists = rnglists_; |
1405 | 0 | rnglists.remove_prefix(offset); |
1406 | |
|
1407 | 0 | while (!rnglists.empty()) { |
1408 | 0 | auto kind = read<uint8_t>(rnglists); |
1409 | 0 | switch (kind) { |
1410 | 0 | case DW_RLE_end_of_list: |
1411 | 0 | return false; |
1412 | 0 | case DW_RLE_base_addressx: { |
1413 | 0 | auto index = readULEB(rnglists); |
1414 | 0 | auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); |
1415 | 0 | base_addr = read<uint64_t>(sp); |
1416 | 0 | } break; |
1417 | | |
1418 | 0 | case DW_RLE_startx_endx: { |
1419 | 0 | auto index_start = readULEB(rnglists); |
1420 | 0 | auto index_end = readULEB(rnglists); |
1421 | 0 | auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); |
1422 | 0 | auto start = read<uint64_t>(sp_start); |
1423 | |
|
1424 | 0 | auto sp_end = addr_.substr(*cu.addr_base + index_end * sizeof(uint64_t)); |
1425 | 0 | auto end = read<uint64_t>(sp_end); |
1426 | 0 | if (address >= start && address < end) { |
1427 | 0 | return true; |
1428 | 0 | } |
1429 | 0 | } break; |
1430 | | |
1431 | 0 | case DW_RLE_startx_length: { |
1432 | 0 | auto index_start = readULEB(rnglists); |
1433 | 0 | auto length = readULEB(rnglists); |
1434 | 0 | auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); |
1435 | 0 | auto start = read<uint64_t>(sp_start); |
1436 | |
|
1437 | 0 | auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length); |
1438 | 0 | auto end = read<uint64_t>(sp_end); |
1439 | 0 | if (start != end && address >= start && address < end) { |
1440 | 0 | return true; |
1441 | 0 | } |
1442 | 0 | } break; |
1443 | | |
1444 | 0 | case DW_RLE_offset_pair: { |
1445 | 0 | auto offset_start = readULEB(rnglists); |
1446 | 0 | auto offset_end = readULEB(rnglists); |
1447 | 0 | if (base_addr && address >= (*base_addr + offset_start) && |
1448 | 0 | address < (*base_addr + offset_end)) { |
1449 | 0 | return true; |
1450 | 0 | } |
1451 | 0 | } break; |
1452 | | |
1453 | 0 | case DW_RLE_base_address: |
1454 | 0 | base_addr = read<uint64_t>(rnglists); |
1455 | 0 | break; |
1456 | | |
1457 | 0 | case DW_RLE_start_end: { |
1458 | 0 | uint64_t start = read<uint64_t>(rnglists); |
1459 | 0 | uint64_t end = read<uint64_t>(rnglists); |
1460 | 0 | if (address >= start && address < end) { |
1461 | 0 | return true; |
1462 | 0 | } |
1463 | 0 | } break; |
1464 | | |
1465 | 0 | case DW_RLE_start_length: { |
1466 | 0 | uint64_t start = read<uint64_t>(rnglists); |
1467 | 0 | uint64_t end = start + readULEB(rnglists); |
1468 | 0 | if (address >= start && address < end) { |
1469 | 0 | return true; |
1470 | 0 | } |
1471 | 0 | } break; |
1472 | | |
1473 | 0 | default: |
1474 | 0 | SAFE_CHECK(false, "Unexpected debug_rnglists entry kind"); |
1475 | 0 | } |
1476 | 0 | } |
1477 | 0 | } |
1478 | 0 | return false; |
1479 | 0 | } |
1480 | | |
1481 | | Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory, |
1482 | | std::string_view debugStr, std::string_view debugLineStr) |
1483 | | : compilationDirectory_(compilationDirectory), |
1484 | | debugStr_(debugStr), |
1485 | 129 | debugLineStr_(debugLineStr) { |
1486 | 129 | Section section(data); |
1487 | 129 | SAFE_CHECK(section.next(data_), "invalid line number VM"); |
1488 | 129 | is64Bit_ = section.is64Bit(); |
1489 | 129 | init(); |
1490 | 129 | reset(); |
1491 | 129 | } |
1492 | | |
1493 | 9.11k | void Dwarf::LineNumberVM::reset() { |
1494 | 9.11k | address_ = 0; |
1495 | 9.11k | file_ = 1; |
1496 | 9.11k | line_ = 1; |
1497 | 9.11k | column_ = 0; |
1498 | 9.11k | isStmt_ = defaultIsStmt_; |
1499 | 9.11k | basicBlock_ = false; |
1500 | 9.11k | endSequence_ = false; |
1501 | 9.11k | prologueEnd_ = false; |
1502 | 9.11k | epilogueBegin_ = false; |
1503 | 9.11k | isa_ = 0; |
1504 | 9.11k | discriminator_ = 0; |
1505 | 9.11k | } |
1506 | | |
1507 | | struct LineNumberAttribute { |
1508 | | uint64_t content_type_code; |
1509 | | uint64_t form_code; |
1510 | | std::variant<uint64_t, std::string_view> attr_value; |
1511 | | }; |
1512 | | |
1513 | | LineNumberAttribute readLineNumberAttribute(bool is64_bit, std::string_view& format, |
1514 | | std::string_view& entries, std::string_view debugStr, |
1515 | 90.5k | std::string_view debugLineStr) { |
1516 | 90.5k | uint64_t content_type_code = readULEB(format); |
1517 | 90.5k | uint64_t form_code = readULEB(format); |
1518 | 90.5k | std::variant<uint64_t, std::string_view> attr_value; |
1519 | | |
1520 | 90.5k | switch (content_type_code) { |
1521 | 47.8k | case DW_LNCT_path: { |
1522 | 47.8k | switch (form_code) { |
1523 | 0 | case DW_FORM_string: |
1524 | 0 | attr_value = readNullTerminated(entries); |
1525 | 0 | break; |
1526 | 47.8k | case DW_FORM_line_strp: { |
1527 | 47.8k | auto off = readOffset(entries, is64_bit); |
1528 | 47.8k | attr_value = getStringFromStringSection(debugLineStr, off); |
1529 | 47.8k | } break; |
1530 | 0 | case DW_FORM_strp: |
1531 | 0 | attr_value = getStringFromStringSection(debugStr, readOffset(entries, is64_bit)); |
1532 | 0 | break; |
1533 | 0 | case DW_FORM_strp_sup: |
1534 | 0 | SAFE_CHECK(false, "Unexpected DW_FORM_strp_sup"); |
1535 | 0 | break; |
1536 | 0 | default: |
1537 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_path"); |
1538 | 0 | break; |
1539 | 47.8k | } |
1540 | 47.8k | } break; |
1541 | | |
1542 | 47.8k | case DW_LNCT_directory_index: { |
1543 | 42.6k | switch (form_code) { |
1544 | 0 | case DW_FORM_data1: |
1545 | 0 | attr_value = read<uint8_t>(entries); |
1546 | 0 | break; |
1547 | 0 | case DW_FORM_data2: |
1548 | 0 | attr_value = read<uint16_t>(entries); |
1549 | 0 | break; |
1550 | 42.6k | case DW_FORM_udata: |
1551 | 42.6k | attr_value = readULEB(entries); |
1552 | 42.6k | break; |
1553 | 0 | default: |
1554 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_directory_index"); |
1555 | 0 | break; |
1556 | 42.6k | } |
1557 | 42.6k | } break; |
1558 | | |
1559 | 42.6k | case DW_LNCT_timestamp: { |
1560 | 0 | switch (form_code) { |
1561 | 0 | case DW_FORM_udata: |
1562 | 0 | attr_value = readULEB(entries); |
1563 | 0 | break; |
1564 | 0 | case DW_FORM_data4: |
1565 | 0 | attr_value = read<uint32_t>(entries); |
1566 | 0 | break; |
1567 | 0 | case DW_FORM_data8: |
1568 | 0 | attr_value = read<uint64_t>(entries); |
1569 | 0 | break; |
1570 | 0 | case DW_FORM_block: |
1571 | 0 | attr_value = readBytes(entries, readULEB(entries)); |
1572 | 0 | break; |
1573 | 0 | default: |
1574 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_timestamp"); |
1575 | 0 | } |
1576 | 0 | } break; |
1577 | | |
1578 | 0 | case DW_LNCT_size: { |
1579 | 0 | switch (form_code) { |
1580 | 0 | case DW_FORM_udata: |
1581 | 0 | attr_value = readULEB(entries); |
1582 | 0 | break; |
1583 | 0 | case DW_FORM_data1: |
1584 | 0 | attr_value = read<uint8_t>(entries); |
1585 | 0 | break; |
1586 | 0 | case DW_FORM_data2: |
1587 | 0 | attr_value = read<uint16_t>(entries); |
1588 | 0 | break; |
1589 | 0 | case DW_FORM_data4: |
1590 | 0 | attr_value = read<uint32_t>(entries); |
1591 | 0 | break; |
1592 | 0 | case DW_FORM_data8: |
1593 | 0 | attr_value = read<uint64_t>(entries); |
1594 | 0 | break; |
1595 | 0 | default: |
1596 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_size"); |
1597 | 0 | break; |
1598 | 0 | } |
1599 | 0 | } break; |
1600 | | |
1601 | 0 | case DW_LNCT_MD5: { |
1602 | 0 | switch (form_code) { |
1603 | 0 | case DW_FORM_data16: |
1604 | 0 | attr_value = readBytes(entries, 16); |
1605 | 0 | break; |
1606 | 0 | default: |
1607 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_MD5"); |
1608 | 0 | break; |
1609 | 0 | } |
1610 | 0 | } break; |
1611 | | |
1612 | 0 | default: |
1613 | | // TODO: skip over vendor data as specified by the form instead. |
1614 | 0 | SAFE_CHECK(false, "Unexpected vendor content type code"); |
1615 | 0 | break; |
1616 | 90.5k | } |
1617 | 90.5k | return { |
1618 | 90.5k | .content_type_code = content_type_code, |
1619 | 90.5k | .form_code = form_code, |
1620 | 90.5k | .attr_value = attr_value, |
1621 | 90.5k | }; |
1622 | 90.5k | } |
1623 | | |
1624 | 129 | void Dwarf::LineNumberVM::init() { |
1625 | 129 | version_ = read<uint16_t>(data_); |
1626 | 129 | SAFE_CHECK(version_ >= 2 && version_ <= 5, "invalid version in line number VM: {}", version_); |
1627 | 129 | if (version_ == 5) { |
1628 | 129 | auto address_size = read<uint8_t>(data_); |
1629 | 129 | SAFE_CHECK(address_size == sizeof(uintptr_t), "Unexpected Line Number Table address_size"); |
1630 | 129 | auto segment_selector_size = read<uint8_t>(data_); |
1631 | 129 | SAFE_CHECK(segment_selector_size == 0, "Segments not supported"); |
1632 | 129 | } |
1633 | 129 | uint64_t header_length = readOffset(data_, is64Bit_); |
1634 | 129 | SAFE_CHECK(header_length <= data_.size(), "invalid line number VM header length"); |
1635 | 129 | std::string_view header(data_.data(), header_length); |
1636 | 129 | data_ = std::string_view(header.end(), data_.end() - header.end()); |
1637 | | |
1638 | 129 | minLength_ = read<uint8_t>(header); |
1639 | 129 | if (version_ >= 4) { // Version 2 and 3 records don't have this |
1640 | 129 | uint8_t max_ops_per_instruction = read<uint8_t>(header); |
1641 | 129 | SAFE_CHECK(max_ops_per_instruction == 1, "VLIW not supported"); |
1642 | 129 | } |
1643 | 129 | defaultIsStmt_ = read<uint8_t>(header); |
1644 | 129 | lineBase_ = read<int8_t>(header); // yes, signed |
1645 | 129 | lineRange_ = read<uint8_t>(header); |
1646 | 129 | opcodeBase_ = read<uint8_t>(header); |
1647 | 129 | SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base"); |
1648 | 129 | standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data()); |
1649 | 129 | header.remove_prefix(opcodeBase_ - 1); |
1650 | | |
1651 | 129 | if (version_ <= 4) { |
1652 | | // We don't want to use heap, so we don't keep an unbounded amount of state. |
1653 | | // We'll just skip over include directories and file names here, and |
1654 | | // we'll loop again when we actually need to retrieve one. |
1655 | 0 | std::string_view sp; |
1656 | 0 | const char* tmp = header.data(); |
1657 | 0 | v4_.includeDirectoryCount = 0; |
1658 | 0 | while (!(sp = readNullTerminated(header)).empty()) { |
1659 | 0 | ++v4_.includeDirectoryCount; |
1660 | 0 | } |
1661 | 0 | v4_.includeDirectories = {tmp, header.data()}; |
1662 | |
|
1663 | 0 | tmp = header.data(); |
1664 | 0 | FileName fn; |
1665 | 0 | v4_.fileNameCount = 0; |
1666 | 0 | while (readFileName(header, fn)) { |
1667 | 0 | ++v4_.fileNameCount; |
1668 | 0 | } |
1669 | 0 | v4_.fileNames = {tmp, header.data()}; |
1670 | 129 | } else if (version_ == 5) { |
1671 | 129 | v5_.directoryEntryFormatCount = read<uint8_t>(header); |
1672 | 129 | const char* tmp = header.data(); |
1673 | 258 | for (uint8_t i = 0; i < v5_.directoryEntryFormatCount; i++) { |
1674 | | // A sequence of directory entry format descriptions. Each description |
1675 | | // consists of a pair of ULEB128 values: |
1676 | 129 | readULEB(header); // A content type code |
1677 | 129 | readULEB(header); // A form code using the attribute form codes |
1678 | 129 | } |
1679 | 129 | v5_.directoryEntryFormat = {tmp, header.data()}; |
1680 | 129 | v5_.directoriesCount = readULEB(header); |
1681 | 129 | tmp = header.data(); |
1682 | 4.55k | for (uint64_t i = 0; i < v5_.directoriesCount; i++) { |
1683 | 4.42k | std::string_view format = v5_.directoryEntryFormat; |
1684 | 8.85k | for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) { |
1685 | 4.42k | readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); |
1686 | 4.42k | } |
1687 | 4.42k | } |
1688 | 129 | v5_.directories = {tmp, header.data()}; |
1689 | | |
1690 | 129 | v5_.fileNameEntryFormatCount = read<uint8_t>(header); |
1691 | 129 | tmp = header.data(); |
1692 | 387 | for (uint8_t i = 0; i < v5_.fileNameEntryFormatCount; i++) { |
1693 | | // A sequence of file entry format descriptions. Each description |
1694 | | // consists of a pair of ULEB128 values: |
1695 | 258 | readULEB(header); // A content type code |
1696 | 258 | readULEB(header); // A form code using the attribute form codes |
1697 | 258 | } |
1698 | 129 | v5_.fileNameEntryFormat = {tmp, header.data()}; |
1699 | 129 | v5_.fileNamesCount = readULEB(header); |
1700 | 129 | tmp = header.data(); |
1701 | 21.4k | for (uint64_t i = 0; i < v5_.fileNamesCount; i++) { |
1702 | 21.3k | std::string_view format = v5_.fileNameEntryFormat; |
1703 | 64.0k | for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) { |
1704 | 42.6k | readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); |
1705 | 42.6k | } |
1706 | 21.3k | } |
1707 | 129 | v5_.fileNames = {tmp, header.data()}; |
1708 | 129 | } |
1709 | 129 | } |
1710 | | |
1711 | 482k | bool Dwarf::LineNumberVM::next(std::string_view& program) { |
1712 | 482k | Dwarf::LineNumberVM::StepResult ret; |
1713 | 1.33M | do { |
1714 | 1.33M | ret = step(program); |
1715 | 1.33M | } while (ret == CONTINUE); |
1716 | | |
1717 | 482k | return (ret == COMMIT); |
1718 | 482k | } |
1719 | | |
1720 | 129 | Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const { |
1721 | 129 | if (version_ <= 4) { |
1722 | 0 | SAFE_CHECK(index != 0, "invalid file index 0"); |
1723 | 0 | FileName fn; |
1724 | 0 | if (index <= v4_.fileNameCount) { |
1725 | 0 | std::string_view file_names = v4_.fileNames; |
1726 | 0 | for (; index; --index) { |
1727 | 0 | if (!readFileName(file_names, fn)) { |
1728 | 0 | abort(); |
1729 | 0 | } |
1730 | 0 | } |
1731 | 0 | return fn; |
1732 | 0 | } |
1733 | | |
1734 | 0 | index -= v4_.fileNameCount; |
1735 | |
|
1736 | 0 | std::string_view program = data_; |
1737 | 0 | for (; index; --index) { |
1738 | 0 | SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); |
1739 | 0 | } |
1740 | |
|
1741 | 0 | return fn; |
1742 | 129 | } else { |
1743 | 129 | FileName fn; |
1744 | 129 | SAFE_CHECK(index < v5_.fileNamesCount, "invalid file index"); |
1745 | 129 | std::string_view file_names = v5_.fileNames; |
1746 | 21.4k | for (uint64_t i = 0; i < v5_.fileNamesCount; i++) { |
1747 | 21.3k | std::string_view format = v5_.fileNameEntryFormat; |
1748 | 64.0k | for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) { |
1749 | 42.6k | auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_, |
1750 | 42.6k | debugLineStr_); |
1751 | 42.6k | if (i == index) { |
1752 | 258 | switch (attr.content_type_code) { |
1753 | 129 | case DW_LNCT_path: |
1754 | 129 | fn.relativeName = std::get<std::string_view>(attr.attr_value); |
1755 | 129 | break; |
1756 | 129 | case DW_LNCT_directory_index: |
1757 | 129 | fn.directoryIndex = std::get<uint64_t>(attr.attr_value); |
1758 | 129 | break; |
1759 | 258 | } |
1760 | 258 | } |
1761 | 42.6k | } |
1762 | 21.3k | } |
1763 | 129 | return fn; |
1764 | 129 | } |
1765 | 129 | } |
1766 | | |
1767 | 129 | std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const { |
1768 | 129 | if (version_ <= 4) { |
1769 | 0 | if (index == 0) { |
1770 | | // In DWARF <= 4 the current directory is not represented in the |
1771 | | // directories field and a directory index of 0 implicitly referred to |
1772 | | // that directory as found in the DW_AT_comp_dir attribute of the |
1773 | | // compilation unit debugging information entry. |
1774 | 0 | return {}; |
1775 | 0 | } |
1776 | | |
1777 | 0 | SAFE_CHECK(index <= v4_.includeDirectoryCount, "invalid include directory"); |
1778 | |
|
1779 | 0 | std::string_view include_directories = v4_.includeDirectories; |
1780 | 0 | std::string_view dir; |
1781 | 0 | for (; index; --index) { |
1782 | 0 | dir = readNullTerminated(include_directories); |
1783 | 0 | if (dir.empty()) { |
1784 | 0 | abort(); // BUG |
1785 | 0 | } |
1786 | 0 | } |
1787 | | |
1788 | 0 | return dir; |
1789 | 129 | } else { |
1790 | 129 | SAFE_CHECK(index < v5_.directoriesCount, "invalid file index"); |
1791 | 129 | std::string_view directories = v5_.directories; |
1792 | 733 | for (uint64_t i = 0; i < v5_.directoriesCount; i++) { |
1793 | 733 | std::string_view format = v5_.directoryEntryFormat; |
1794 | 1.33k | for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) { |
1795 | 733 | auto attr = readLineNumberAttribute(is64Bit_, format, directories, debugStr_, |
1796 | 733 | debugLineStr_); |
1797 | 733 | if (i == index && attr.content_type_code == DW_LNCT_path) { |
1798 | 129 | return std::get<std::string_view>(attr.attr_value); |
1799 | 129 | } |
1800 | 733 | } |
1801 | 733 | } |
1802 | | // This could only happen if DWARF5's directory_entry_format doesn't contain |
1803 | | // a DW_LNCT_path. Highly unlikely, but we shouldn't crash. |
1804 | 0 | return std::string_view("<directory not found>"); |
1805 | 129 | } |
1806 | 129 | } |
1807 | | |
1808 | 0 | bool Dwarf::LineNumberVM::readFileName(std::string_view& program, FileName& fn) { |
1809 | 0 | fn.relativeName = readNullTerminated(program); |
1810 | 0 | if (fn.relativeName.empty()) { |
1811 | 0 | return false; |
1812 | 0 | } |
1813 | 0 | fn.directoryIndex = readULEB(program); |
1814 | | // Skip over file size and last modified time |
1815 | 0 | readULEB(program); |
1816 | 0 | readULEB(program); |
1817 | 0 | return true; |
1818 | 0 | } |
1819 | | |
1820 | 0 | bool Dwarf::LineNumberVM::nextDefineFile(std::string_view& program, FileName& fn) const { |
1821 | 0 | while (!program.empty()) { |
1822 | 0 | auto opcode = read<uint8_t>(program); |
1823 | |
|
1824 | 0 | if (opcode >= opcodeBase_) { // special opcode |
1825 | 0 | continue; |
1826 | 0 | } |
1827 | | |
1828 | 0 | if (opcode != 0) { // standard opcode |
1829 | | // Skip, slurp the appropriate number of LEB arguments |
1830 | 0 | uint8_t arg_count = standardOpcodeLengths_[opcode - 1]; |
1831 | 0 | while (arg_count--) { |
1832 | 0 | readULEB(program); |
1833 | 0 | } |
1834 | 0 | continue; |
1835 | 0 | } |
1836 | | |
1837 | | // Extended opcode |
1838 | 0 | auto length = readULEB(program); |
1839 | | // the opcode itself should be included in the length, so length >= 1 |
1840 | 0 | SAFE_CHECK(length != 0, "invalid extended opcode length"); |
1841 | 0 | read<uint8_t>(program); // extended opcode |
1842 | 0 | --length; |
1843 | |
|
1844 | 0 | if (opcode == DW_LNE_define_file) { |
1845 | 0 | SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); |
1846 | 0 | SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file"); |
1847 | 0 | return true; |
1848 | 0 | } |
1849 | | |
1850 | 0 | program.remove_prefix(length); |
1851 | 0 | } |
1852 | | |
1853 | 0 | return false; |
1854 | 0 | } |
1855 | | |
1856 | 1.33M | Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view& program) { |
1857 | 1.33M | auto opcode = read<uint8_t>(program); |
1858 | | |
1859 | 1.33M | if (opcode >= opcodeBase_) { // special opcode |
1860 | 405k | uint8_t adjusted_opcode = opcode - opcodeBase_; |
1861 | 405k | uint8_t op_advance = adjusted_opcode / lineRange_; |
1862 | | |
1863 | 405k | address_ += minLength_ * op_advance; |
1864 | 405k | line_ += lineBase_ + adjusted_opcode % lineRange_; |
1865 | | |
1866 | 405k | basicBlock_ = false; |
1867 | 405k | prologueEnd_ = false; |
1868 | 405k | epilogueBegin_ = false; |
1869 | 405k | discriminator_ = 0; |
1870 | 405k | return COMMIT; |
1871 | 405k | } |
1872 | | |
1873 | 930k | if (opcode != 0) { // standard opcode |
1874 | | // Only interpret opcodes that are recognized by the version we're parsing; |
1875 | | // the others are vendor extensions and we should ignore them. |
1876 | 913k | switch (opcode) { |
1877 | 68.2k | case DW_LNS_copy: |
1878 | 68.2k | basicBlock_ = false; |
1879 | 68.2k | prologueEnd_ = false; |
1880 | 68.2k | epilogueBegin_ = false; |
1881 | 68.2k | discriminator_ = 0; |
1882 | 68.2k | return COMMIT; |
1883 | 82.0k | case DW_LNS_advance_pc: |
1884 | 82.0k | address_ += minLength_ * readULEB(program); |
1885 | 82.0k | return CONTINUE; |
1886 | 371k | case DW_LNS_advance_line: |
1887 | 371k | line_ += readSLEB(program); |
1888 | 371k | return CONTINUE; |
1889 | 43.8k | case DW_LNS_set_file: |
1890 | 43.8k | file_ = readULEB(program); |
1891 | 43.8k | return CONTINUE; |
1892 | 139k | case DW_LNS_set_column: |
1893 | 139k | column_ = readULEB(program); |
1894 | 139k | return CONTINUE; |
1895 | 90.6k | case DW_LNS_negate_stmt: |
1896 | 90.6k | isStmt_ = !isStmt_; |
1897 | 90.6k | return CONTINUE; |
1898 | 0 | case DW_LNS_set_basic_block: |
1899 | 0 | basicBlock_ = true; |
1900 | 0 | return CONTINUE; |
1901 | 87.4k | case DW_LNS_const_add_pc: |
1902 | 87.4k | address_ += minLength_ * ((255 - opcodeBase_) / lineRange_); |
1903 | 87.4k | return CONTINUE; |
1904 | 0 | case DW_LNS_fixed_advance_pc: |
1905 | 0 | address_ += read<uint16_t>(program); |
1906 | 0 | return CONTINUE; |
1907 | 15.0k | case DW_LNS_set_prologue_end: |
1908 | 15.0k | if (version_ == 2) { |
1909 | 0 | break; // not supported in version 2 |
1910 | 0 | } |
1911 | 15.0k | prologueEnd_ = true; |
1912 | 15.0k | return CONTINUE; |
1913 | 14.8k | case DW_LNS_set_epilogue_begin: |
1914 | 14.8k | if (version_ == 2) { |
1915 | 0 | break; // not supported in version 2 |
1916 | 0 | } |
1917 | 14.8k | epilogueBegin_ = true; |
1918 | 14.8k | return CONTINUE; |
1919 | 0 | case DW_LNS_set_isa: |
1920 | 0 | if (version_ == 2) { |
1921 | 0 | break; // not supported in version 2 |
1922 | 0 | } |
1923 | 0 | isa_ = readULEB(program); |
1924 | 0 | return CONTINUE; |
1925 | 913k | } |
1926 | | |
1927 | | // Unrecognized standard opcode, slurp the appropriate number of LEB |
1928 | | // arguments. |
1929 | 0 | uint8_t arg_count = standardOpcodeLengths_[opcode - 1]; |
1930 | 0 | while (arg_count--) { |
1931 | 0 | readULEB(program); |
1932 | 0 | } |
1933 | 0 | return CONTINUE; |
1934 | 913k | } |
1935 | | |
1936 | | // Extended opcode |
1937 | 17.8k | auto length = readULEB(program); |
1938 | | // the opcode itself should be included in the length, so length >= 1 |
1939 | 17.8k | SAFE_CHECK(length != 0, "invalid extended opcode length"); |
1940 | 17.8k | auto extended_opcode = read<uint8_t>(program); |
1941 | 17.8k | --length; |
1942 | | |
1943 | 17.8k | switch (extended_opcode) { |
1944 | 8.87k | case DW_LNE_end_sequence: |
1945 | 8.87k | return END; |
1946 | 8.98k | case DW_LNE_set_address: |
1947 | 8.98k | address_ = read<uintptr_t>(program); |
1948 | 8.98k | return CONTINUE; |
1949 | 0 | case DW_LNE_define_file: |
1950 | 0 | SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); |
1951 | | // We can't process DW_LNE_define_file here, as it would require us to |
1952 | | // use unbounded amounts of state (ie. use the heap). We'll do a second |
1953 | | // pass (using nextDefineFile()) if necessary. |
1954 | 0 | break; |
1955 | 0 | case DW_LNE_set_discriminator: |
1956 | 0 | discriminator_ = readULEB(program); |
1957 | 0 | return CONTINUE; |
1958 | 17.8k | } |
1959 | | |
1960 | | // Unrecognized extended opcode |
1961 | 0 | program.remove_prefix(length); |
1962 | 0 | return CONTINUE; |
1963 | 17.8k | } |
1964 | | |
1965 | 129 | Dwarf::Path Dwarf::LineNumberVM::getFullFileName(uint64_t index) const { |
1966 | 129 | auto fn = getFileName(index); |
1967 | | // DWARF <= 4: the current dir is not represented in the CU's Line Number |
1968 | | // Program Header and relies on the CU's DW_AT_comp_dir. |
1969 | | // DWARF 5: the current directory is explicitly present. |
1970 | 129 | const std::string_view base_dir = version_ == 5 ? "" : compilationDirectory_; |
1971 | 129 | return Path(base_dir, getIncludeDirectory(fn.directoryIndex), fn.relativeName); |
1972 | 129 | } |
1973 | | |
1974 | 129 | bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file, uint64_t& line) { |
1975 | 129 | std::string_view program = data_; |
1976 | | |
1977 | | // Within each sequence of instructions, the address may only increase. |
1978 | | // Unfortunately, within the same compilation unit, sequences may appear |
1979 | | // in any order. So any sequence is a candidate if it starts at an address |
1980 | | // <= the target address, and we know we've found the target address if |
1981 | | // a candidate crosses the target address. |
1982 | 129 | enum State { |
1983 | 129 | START, |
1984 | 129 | LOW_SEQ, // candidate |
1985 | 129 | HIGH_SEQ |
1986 | 129 | }; |
1987 | 129 | State state = START; |
1988 | 129 | reset(); |
1989 | | |
1990 | 129 | uint64_t prev_file = 0; |
1991 | 129 | uint64_t prev_line = 0; |
1992 | 482k | while (!program.empty()) { |
1993 | 482k | bool seq_end = !next(program); |
1994 | | |
1995 | 482k | if (state == START) { |
1996 | 8.98k | if (!seq_end) { |
1997 | 8.98k | state = address_ <= target ? LOW_SEQ : HIGH_SEQ; |
1998 | 8.98k | } |
1999 | 8.98k | } |
2000 | | |
2001 | 482k | if (state == LOW_SEQ) { |
2002 | 386k | if (address_ > target) { |
2003 | | // Found it! Note that ">" is indeed correct (not ">="), as each |
2004 | | // sequence is guaranteed to have one entry past-the-end (emitted by |
2005 | | // DW_LNE_end_sequence) |
2006 | | // |
2007 | | // NOTE: In DWARF <= 4 the file register is non-zero. |
2008 | | // See DWARF 4: 6.2.4 The Line Number Program Header |
2009 | | // "The line number program assigns numbers to each of the file |
2010 | | // entries in order, beginning with 1, and uses those numbers instead |
2011 | | // of file names in the file register." |
2012 | | // DWARF 5 has a different include directory/file header and 0 is valid. |
2013 | 129 | if (version_ <= 4 && prev_file == 0) { |
2014 | 0 | return false; |
2015 | 0 | } |
2016 | 129 | file = getFullFileName(prev_file); |
2017 | 129 | line = prev_line; |
2018 | 129 | return true; |
2019 | 129 | } |
2020 | 386k | prev_file = file_; |
2021 | 386k | prev_line = line_; |
2022 | 386k | } |
2023 | | |
2024 | 482k | if (seq_end) { |
2025 | 8.85k | state = START; |
2026 | 8.85k | reset(); |
2027 | 8.85k | } |
2028 | 482k | } |
2029 | | |
2030 | 0 | return false; |
2031 | 129 | } |
2032 | | |
2033 | | } // namespace doris |
2034 | | |
2035 | | #endif |