/root/doris/be/src/common/dwarf.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Dwarf.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #if defined(__ELF__) && !defined(__FreeBSD__) |
22 | | |
23 | | /* |
24 | | * Copyright 2012-present Facebook, Inc. |
25 | | * |
26 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
27 | | * you may not use this file except in compliance with the License. |
28 | | * You may obtain a copy of the License at |
29 | | * |
30 | | * http://www.apache.org/licenses/LICENSE-2.0 |
31 | | * |
32 | | * Unless required by applicable law or agreed to in writing, software |
33 | | * distributed under the License is distributed on an "AS IS" BASIS, |
34 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
35 | | * See the License for the specific language governing permissions and |
36 | | * limitations under the License. |
37 | | */ |
38 | | |
39 | | /** This file was edited for ClickHouse. |
40 | | */ |
41 | | |
42 | | #include "common/dwarf.h" |
43 | | |
44 | | #include <cstring> |
45 | | |
46 | | #include "common/elf.h" |
47 | | #include "common/logging.h" |
48 | | |
49 | 6.50k | #define DW_CHILDREN_no 0 |
50 | | |
51 | 1.36k | #define DW_FORM_addr 1 |
52 | 0 | #define DW_FORM_block1 0x0a |
53 | 0 | #define DW_FORM_block2 3 |
54 | 0 | #define DW_FORM_block4 4 |
55 | 0 | #define DW_FORM_block 9 |
56 | 0 | #define DW_FORM_exprloc 0x18 |
57 | 50 | #define DW_FORM_data1 0x0b |
58 | 50 | #define DW_FORM_ref1 0x11 |
59 | 1.31k | #define DW_FORM_data2 0x05 |
60 | 1.31k | #define DW_FORM_ref2 0x12 |
61 | 0 | #define DW_FORM_data4 0x06 |
62 | 0 | #define DW_FORM_ref4 0x13 |
63 | 50 | #define DW_FORM_data8 0x07 |
64 | 50 | #define DW_FORM_ref8 0x14 |
65 | 50 | #define DW_FORM_ref_sig8 0x20 |
66 | 0 | #define DW_FORM_sdata 0x0d |
67 | 393k | #define DW_FORM_udata 0x0f |
68 | 0 | #define DW_FORM_ref_udata 0x15 |
69 | 0 | #define DW_FORM_flag 0x0c |
70 | 0 | #define DW_FORM_flag_present 0x19 |
71 | 6.56k | #define DW_FORM_sec_offset 0x17 |
72 | 6.56k | #define DW_FORM_ref_addr 0x10 |
73 | 0 | #define DW_FORM_string 0x08 |
74 | 50 | #define DW_FORM_strp 0x0e |
75 | 0 | #define DW_FORM_indirect 0x16 |
76 | 0 | #define DW_FORM_strx 0x1a |
77 | 0 | #define DW_FORM_addrx 0x1b |
78 | 0 | #define DW_FORM_ref_sup4 0x1c |
79 | 0 | #define DW_FORM_strp_sup 0x1d |
80 | 0 | #define DW_FORM_data16 0x1e |
81 | 432k | #define DW_FORM_line_strp 0x1f |
82 | 64.2k | #define DW_FORM_implicit_const 0x21 |
83 | 1.31k | #define DW_FORM_rnglistx 0x23 |
84 | 0 | #define DW_FORM_loclistx 0x22 |
85 | 0 | #define DW_FORM_ref_sup8 0x24 |
86 | 3.93k | #define DW_FORM_strx1 0x25 |
87 | 0 | #define DW_FORM_strx2 0x26 |
88 | 0 | #define DW_FORM_strx3 0x27 |
89 | 0 | #define DW_FORM_strx4 0x28 |
90 | 0 | #define DW_FORM_addrx1 0x29 |
91 | 0 | #define DW_FORM_addrx2 0x2a |
92 | 0 | #define DW_FORM_addrx3 0x2b |
93 | 0 | #define DW_FORM_addrx4 0x2c |
94 | | |
95 | 681 | #define DW_TAG_compile_unit 0x11 |
96 | 0 | #define DW_TAG_subprogram 0x2e |
97 | 0 | #define DW_TAG_try_block 0x32 |
98 | 0 | #define DW_TAG_catch_block 0x25 |
99 | 0 | #define DW_TAG_entry_point 0x03 |
100 | 0 | #define DW_TAG_common_block 0x1a |
101 | 0 | #define DW_TAG_lexical_block 0x0b |
102 | | |
103 | 681 | #define DW_AT_stmt_list 0x10 |
104 | 681 | #define DW_AT_comp_dir 0x1b |
105 | 681 | #define DW_AT_name 0x03 |
106 | 0 | #define DW_AT_high_pc 0x12 |
107 | 681 | #define DW_AT_low_pc 0x11 |
108 | 681 | #define DW_AT_entry_pc 0x52 |
109 | 0 | #define DW_AT_ranges 0x55 |
110 | 0 | #define DW_AT_abstract_origin 0x31 |
111 | 0 | #define DW_AT_call_line 0x59 |
112 | 0 | #define DW_AT_call_file 0x58 |
113 | 0 | #define DW_AT_linkage_name 0x6e |
114 | 0 | #define DW_AT_specification 0x47 |
115 | 656 | #define DW_AT_str_offsets_base 0x72 |
116 | 656 | #define DW_AT_addr_base 0x73 |
117 | 656 | #define DW_AT_rnglists_base 0x74 |
118 | 634 | #define DW_AT_loclists_base 0x8c |
119 | 656 | #define DW_AT_GNU_ranges_base 0x2132 |
120 | 656 | #define DW_AT_GNU_addr_base 0x2133 |
121 | | |
122 | 0 | #define DW_LNE_define_file 0x03 |
123 | 1.35M | #define DW_LNS_copy 0x01 |
124 | 1.67M | #define DW_LNS_advance_pc 0x02 |
125 | 11.5M | #define DW_LNS_advance_line 0x03 |
126 | 2.79M | #define DW_LNS_set_file 0x04 |
127 | 8.07M | #define DW_LNS_set_column 0x05 |
128 | 7.22M | #define DW_LNS_negate_stmt 0x06 |
129 | 0 | #define DW_LNS_set_basic_block 0x07 |
130 | 2.99M | #define DW_LNS_const_add_pc 0x08 |
131 | 0 | #define DW_LNS_fixed_advance_pc 0x09 |
132 | 272k | #define DW_LNS_set_prologue_end 0x0a |
133 | 287k | #define DW_LNS_set_epilogue_begin 0x0b |
134 | 0 | #define DW_LNS_set_isa 0x0c |
135 | 266k | #define DW_LNE_end_sequence 0x01 |
136 | 266k | #define DW_LNE_set_address 0x02 |
137 | 14.5k | #define DW_LNE_set_discriminator 0x04 |
138 | | |
139 | 434k | #define DW_LNCT_path 0x1 |
140 | 393k | #define DW_LNCT_directory_index 0x2 |
141 | 0 | #define DW_LNCT_timestamp 0x3 |
142 | 0 | #define DW_LNCT_size 0x4 |
143 | 0 | #define DW_LNCT_MD5 0x5 |
144 | | |
145 | 0 | #define DW_RLE_end_of_list 0x0 |
146 | 0 | #define DW_RLE_base_addressx 0x1 |
147 | 0 | #define DW_RLE_startx_endx 0x2 |
148 | 0 | #define DW_RLE_startx_length 0x3 |
149 | 0 | #define DW_RLE_offset_pair 0x4 |
150 | 0 | #define DW_RLE_base_address 0x5 |
151 | 0 | #define DW_RLE_start_end 0x6 |
152 | 0 | #define DW_RLE_start_length 0x7 |
153 | | |
154 | | namespace doris { |
155 | | #include "common/compile_check_avoid_begin.h" |
156 | | |
157 | | Dwarf::Dwarf(const std::shared_ptr<Elf>& elf) |
158 | 96.5k | : elf_(elf), |
159 | 96.5k | abbrev_(getSection(".debug_abbrev")), |
160 | 96.5k | addr_(getSection(".debug_addr")), |
161 | 96.5k | aranges_(getSection(".debug_aranges")), |
162 | 96.5k | info_(getSection(".debug_info")), |
163 | 96.5k | line_(getSection(".debug_line")), |
164 | 96.5k | line_str_(getSection(".debug_line_str")), |
165 | 96.5k | loclists_(getSection(".debug_loclists")), |
166 | 96.5k | ranges_(getSection(".debug_ranges")), |
167 | 96.5k | rnglists_(getSection(".debug_rnglists")), |
168 | 96.5k | str_(getSection(".debug_str")), |
169 | 96.5k | str_offsets_(getSection(".debug_str_offsets")) { |
170 | | // Optional sections: |
171 | | // - debugAranges_: for fast address range lookup. |
172 | | // If missing .debug_info can be used - but it's much slower (linear |
173 | | // scan). |
174 | | // - debugRanges_ (DWARF 4) / debugRnglists_ (DWARF 5): non-contiguous |
175 | | // address ranges of debugging information entries. |
176 | | // Used for inline function address lookup. |
177 | 96.5k | if (info_.empty() || abbrev_.empty() || line_.empty() || str_.empty()) { |
178 | 220 | elf_ = nullptr; |
179 | 220 | } |
180 | 96.5k | } |
181 | | |
182 | 1.64k | Dwarf::Section::Section(std::string_view d) : is64_bit(false), data(d) {} |
183 | | |
184 | | #define SAFE_CHECK(cond, ...) \ |
185 | 2.54G | do { \ |
186 | 2.54G | if (!(cond)) LOG(FATAL) << fmt::format(__VA_ARGS__); \ |
187 | 2.54G | } while (false) |
188 | | |
189 | | namespace { |
190 | | // Maximum number of DIEAbbreviation to cache in a compilation unit. Used to |
191 | | // speed up inline function lookup. |
192 | | const uint32_t kMaxAbbreviationEntries = 1000; |
193 | | |
194 | | // All following read* functions read from a std::string_view, advancing the |
195 | | // std::string_view, and aborting if there's not enough room. |
196 | | |
197 | | // Read (bitwise) one object of type T |
198 | | template <typename T> |
199 | | requires std::is_trivial_v<T> && std::is_standard_layout_v<T> |
200 | 2.53G | T read(std::string_view& sp) { |
201 | 2.53G | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), |
202 | 2.53G | sp.size()); |
203 | 2.53G | T x; |
204 | 2.53G | memcpy(&x, sp.data(), sizeof(T)); |
205 | 2.53G | sp.remove_prefix(sizeof(T)); |
206 | 2.53G | return x; |
207 | 2.53G | } dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIhQaasr3stdE12is_trivial_vIT_Esr3stdE20is_standard_layout_vIS2_EEES2_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 200 | 89.7M | T read(std::string_view& sp) { | 201 | | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 202 | 89.7M | sp.size()); | 203 | 89.7M | T x; | 204 | 89.7M | memcpy(&x, sp.data(), sizeof(T)); | 205 | 89.7M | sp.remove_prefix(sizeof(T)); | 206 | 89.7M | return x; | 207 | 89.7M | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readImQaasr3stdE12is_trivial_vIT_Esr3stdE20is_standard_layout_vIS2_EEES2_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 200 | 2.44G | T read(std::string_view& sp) { | 201 | | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 202 | 2.44G | sp.size()); | 203 | 2.44G | T x; | 204 | 2.44G | memcpy(&x, sp.data(), sizeof(T)); | 205 | 2.44G | sp.remove_prefix(sizeof(T)); | 206 | 2.44G | return x; | 207 | 2.44G | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIjQaasr3stdE12is_trivial_vIT_Esr3stdE20is_standard_layout_vIS2_EEES2_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 200 | 2.49M | T read(std::string_view& sp) { | 201 | | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 202 | 2.49M | sp.size()); | 203 | 2.49M | T x; | 204 | 2.49M | memcpy(&x, sp.data(), sizeof(T)); | 205 | 2.49M | sp.remove_prefix(sizeof(T)); | 206 | 2.49M | return x; | 207 | 2.49M | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readItQaasr3stdE12is_trivial_vIT_Esr3stdE20is_standard_layout_vIS2_EEES2_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 200 | 1.02M | T read(std::string_view& sp) { | 201 | | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 202 | 1.02M | sp.size()); | 203 | 1.02M | T x; | 204 | 1.02M | memcpy(&x, sp.data(), sizeof(T)); | 205 | 1.02M | sp.remove_prefix(sizeof(T)); | 206 | 1.02M | return x; | 207 | 1.02M | } |
dwarf.cpp:_ZN5doris12_GLOBAL__N_14readIaQaasr3stdE12is_trivial_vIT_Esr3stdE20is_standard_layout_vIS2_EEES2_RSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 200 | 681 | T read(std::string_view& sp) { | 201 | | SAFE_CHECK(sp.size() >= sizeof(T), "underflow: expected bytes {}, got bytes {}", sizeof(T), | 202 | 681 | sp.size()); | 203 | 681 | T x; | 204 | 681 | memcpy(&x, sp.data(), sizeof(T)); | 205 | 681 | sp.remove_prefix(sizeof(T)); | 206 | 681 | return x; | 207 | 681 | } |
|
208 | | |
209 | | // Read (bitwise) an unsigned number of N bytes (N in 1, 2, 3, 4). |
210 | | template <size_t N> |
211 | 3.93k | uint64_t readU64(std::string_view& sp) { |
212 | 3.93k | SAFE_CHECK(sp.size() >= N, "underflow"); |
213 | 3.93k | uint64_t x = 0; |
214 | 3.93k | memcpy(&x, sp.data(), N); |
215 | 3.93k | sp.remove_prefix(N); |
216 | 3.93k | return x; |
217 | 3.93k | } dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm1EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Line | Count | Source | 211 | 3.93k | uint64_t readU64(std::string_view& sp) { | 212 | | SAFE_CHECK(sp.size() >= N, "underflow"); | 213 | 3.93k | uint64_t x = 0; | 214 | 3.93k | memcpy(&x, sp.data(), N); | 215 | 3.93k | sp.remove_prefix(N); | 216 | 3.93k | return x; | 217 | 3.93k | } |
Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm2EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm3EEEmRSt17basic_string_viewIcSt11char_traitsIcEE Unexecuted instantiation: dwarf.cpp:_ZN5doris12_GLOBAL__N_17readU64ILm4EEEmRSt17basic_string_viewIcSt11char_traitsIcEE |
218 | | |
219 | | // Read ULEB (unsigned) varint value; algorithm from the DWARF spec |
220 | 26.9M | uint64_t readULEB(std::string_view& sp, uint8_t& shift, uint8_t& val) { |
221 | 26.9M | uint64_t r = 0; |
222 | 26.9M | shift = 0; |
223 | 37.0M | do { |
224 | 37.0M | val = read<uint8_t>(sp); |
225 | 37.0M | r |= (uint64_t(val & 0x7f) << shift); |
226 | 37.0M | shift += 7; |
227 | 37.0M | } while (val & 0x80); |
228 | 26.9M | return r; |
229 | 26.9M | } |
230 | | |
231 | 15.3M | uint64_t readULEB(std::string_view& sp) { |
232 | 15.3M | uint8_t shift; |
233 | 15.3M | uint8_t val; |
234 | 15.3M | return readULEB(sp, shift, val); |
235 | 15.3M | } |
236 | | |
237 | | // Read SLEB (signed) varint value; algorithm from the DWARF spec |
238 | 11.5M | int64_t readSLEB(std::string_view& sp) { |
239 | 11.5M | uint8_t shift; |
240 | 11.5M | uint8_t val; |
241 | 11.5M | uint64_t r = readULEB(sp, shift, val); |
242 | | |
243 | 11.5M | if (shift < 64 && (val & 0x40)) { |
244 | 5.69M | r |= -(1ULL << shift); // sign extend |
245 | 5.69M | } |
246 | | |
247 | 11.5M | return r; |
248 | 11.5M | } |
249 | | |
250 | | // Read a value of "section offset" type, which may be 4 or 8 bytes |
251 | 1.46M | uint64_t readOffset(std::string_view& sp, bool is64_bit) { |
252 | 1.46M | return is64_bit ? read<uint64_t>(sp) : read<uint32_t>(sp); |
253 | 1.46M | } |
254 | | |
255 | | // Read "len" bytes |
256 | 0 | std::string_view readBytes(std::string_view& sp, uint64_t len) { |
257 | 0 | SAFE_CHECK(len <= sp.size(), "invalid string length: {} vs. {}", len, sp.size()); |
258 | 0 | std::string_view ret(sp.data(), len); |
259 | 0 | sp.remove_prefix(len); |
260 | 0 | return ret; |
261 | 0 | } |
262 | | |
263 | | // Read a null-terminated string |
264 | 435k | std::string_view readNullTerminated(std::string_view& sp) { |
265 | 435k | const char* p = static_cast<const char*>(memchr(sp.data(), 0, sp.size())); |
266 | 435k | SAFE_CHECK(p, "invalid null-terminated string"); |
267 | 435k | std::string_view ret(sp.data(), p - sp.data()); |
268 | 435k | sp = std::string_view(p + 1, sp.size()); |
269 | 435k | return ret; |
270 | 435k | } |
271 | | |
272 | | // Get a string from the section |
273 | 435k | std::string_view getStringFromStringSection(std::string_view section, uint64_t offset) { |
274 | 435k | SAFE_CHECK(offset < section.size(), "invalid section offset"); |
275 | 435k | std::string_view sp(section); |
276 | 435k | sp.remove_prefix(offset); |
277 | 435k | return readNullTerminated(sp); |
278 | 435k | } |
279 | | |
280 | | // Skip over padding until sp.data() - start is a multiple of alignment |
281 | 1.02M | void skipPadding(std::string_view& sp, const char* start, size_t alignment) { |
282 | 1.02M | size_t remainder = (sp.data() - start) % alignment; |
283 | 1.02M | if (remainder) { |
284 | 1.02M | SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding"); |
285 | 1.02M | sp.remove_prefix(alignment - remainder); |
286 | 1.02M | } |
287 | 1.02M | } |
288 | | |
289 | | } // namespace |
290 | | |
291 | | Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file) |
292 | 1.36k | : baseDir_(baseDir), subDir_(subDir), file_(file) { |
293 | | // Normalize |
294 | 1.36k | if (file_.empty()) { |
295 | 0 | baseDir_ = {}; |
296 | 0 | subDir_ = {}; |
297 | 0 | return; |
298 | 0 | } |
299 | | |
300 | 1.36k | if (file_[0] == '/') { |
301 | | // file_ is absolute |
302 | 681 | baseDir_ = {}; |
303 | 681 | subDir_ = {}; |
304 | 681 | } |
305 | | |
306 | 1.36k | if (!subDir_.empty() && subDir_[0] == '/') { |
307 | 681 | baseDir_ = {}; // subDir_ is absolute |
308 | 681 | } |
309 | | |
310 | | // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't. |
311 | 1.36k | if (baseDir_.empty()) { |
312 | 1.36k | swap(baseDir_, subDir_); |
313 | 1.36k | } |
314 | 1.36k | } |
315 | | |
316 | 1.36k | size_t Dwarf::Path::size() const { |
317 | 1.36k | size_t size = 0; |
318 | 1.36k | bool needs_slash = false; |
319 | | |
320 | 1.36k | if (!baseDir_.empty()) { |
321 | 1.36k | size += baseDir_.size(); |
322 | 1.36k | needs_slash = baseDir_.back() != '/'; |
323 | 1.36k | } |
324 | | |
325 | 1.36k | if (!subDir_.empty()) { |
326 | 0 | size += needs_slash; |
327 | 0 | size += subDir_.size(); |
328 | 0 | needs_slash = subDir_.back() != '/'; |
329 | 0 | } |
330 | | |
331 | 1.36k | if (!file_.empty()) { |
332 | 1.36k | size += needs_slash; |
333 | 1.36k | size += file_.size(); |
334 | 1.36k | } |
335 | | |
336 | 1.36k | return size; |
337 | 1.36k | } |
338 | | |
339 | 0 | size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const { |
340 | 0 | size_t total_size = 0; |
341 | 0 | bool needs_slash = false; |
342 | |
|
343 | 0 | auto append = [&](std::string_view sp) { |
344 | 0 | if (bufSize >= 2) { |
345 | 0 | size_t to_copy = std::min(sp.size(), bufSize - 1); |
346 | 0 | memcpy(buf, sp.data(), to_copy); |
347 | 0 | buf += to_copy; |
348 | 0 | bufSize -= to_copy; |
349 | 0 | } |
350 | 0 | total_size += sp.size(); |
351 | 0 | }; |
352 | |
|
353 | 0 | if (!baseDir_.empty()) { |
354 | 0 | append(baseDir_); |
355 | 0 | needs_slash = baseDir_.back() != '/'; |
356 | 0 | } |
357 | 0 | if (!subDir_.empty()) { |
358 | 0 | if (needs_slash) { |
359 | 0 | append("/"); |
360 | 0 | } |
361 | 0 | append(subDir_); |
362 | 0 | needs_slash = subDir_.back() != '/'; |
363 | 0 | } |
364 | 0 | if (!file_.empty()) { |
365 | 0 | if (needs_slash) { |
366 | 0 | append("/"); |
367 | 0 | } |
368 | 0 | append(file_); |
369 | 0 | } |
370 | 0 | if (bufSize) { |
371 | 0 | *buf = '\0'; |
372 | 0 | } |
373 | |
|
374 | 0 | SAFE_CHECK(total_size == size(), "Size mismatch"); |
375 | 0 | return total_size; |
376 | 0 | } |
377 | | |
378 | 681 | void Dwarf::Path::toString(std::string& dest) const { |
379 | 681 | size_t initial_size = dest.size(); |
380 | 681 | dest.reserve(initial_size + size()); |
381 | 681 | if (!baseDir_.empty()) { |
382 | 681 | dest.append(baseDir_.begin(), baseDir_.end()); |
383 | 681 | } |
384 | 681 | if (!subDir_.empty()) { |
385 | 0 | if (!dest.empty() && dest.back() != '/') { |
386 | 0 | dest.push_back('/'); |
387 | 0 | } |
388 | 0 | dest.append(subDir_.begin(), subDir_.end()); |
389 | 0 | } |
390 | 681 | if (!file_.empty()) { |
391 | 681 | if (!dest.empty() && dest.back() != '/') { |
392 | 681 | dest.push_back('/'); |
393 | 681 | } |
394 | 681 | dest.append(file_.begin(), file_.end()); |
395 | 681 | } |
396 | 681 | SAFE_CHECK(dest.size() == initial_size + size(), "Size mismatch"); |
397 | 681 | } |
398 | | |
399 | | // Next chunk in section |
400 | 1.02M | bool Dwarf::Section::next(std::string_view& chunk) { |
401 | 1.02M | chunk = data; |
402 | 1.02M | if (chunk.empty()) { |
403 | 287 | return false; |
404 | 287 | } |
405 | | |
406 | | // Initial length is a uint32_t value for a 32-bit section, and |
407 | | // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit |
408 | | // section. |
409 | 1.02M | auto initial_length = read<uint32_t>(chunk); |
410 | 1.02M | is64_bit = (initial_length == uint32_t(-1)); |
411 | 1.02M | auto length = is64_bit ? read<uint64_t>(chunk) : initial_length; |
412 | 1.02M | SAFE_CHECK(length <= chunk.size(), "invalid DWARF section"); |
413 | 1.02M | chunk = std::string_view(chunk.data(), length); |
414 | 1.02M | data = std::string_view(chunk.end(), data.end() - chunk.end()); |
415 | 1.02M | return true; |
416 | 1.02M | } |
417 | | |
418 | 1.06M | std::string_view Dwarf::getSection(const char* name) const { |
419 | 1.06M | std::optional<Elf::Section> elf_section = elf_->findSectionByName(name); |
420 | 1.06M | if (!elf_section) { |
421 | 2.42k | return {}; |
422 | 2.42k | } |
423 | | |
424 | 1.05M | #ifdef SHF_COMPRESSED |
425 | 1.05M | if (elf_section->header.sh_flags & SHF_COMPRESSED) { |
426 | 0 | return {}; |
427 | 0 | } |
428 | 1.05M | #endif |
429 | | |
430 | 1.05M | return {elf_section->begin(), elf_section->size()}; |
431 | 1.05M | } |
432 | | |
433 | | // static |
434 | 6.50k | bool Dwarf::readAbbreviation(std::string_view& section, DIEAbbreviation& abbr) { |
435 | | // abbreviation code |
436 | 6.50k | abbr.code = readULEB(section); |
437 | 6.50k | if (abbr.code == 0) { |
438 | 0 | return false; |
439 | 0 | } |
440 | | |
441 | | // abbreviation tag |
442 | 6.50k | abbr.tag = readULEB(section); |
443 | | |
444 | | // does this entry have children? |
445 | 6.50k | abbr.has_children = (read<uint8_t>(section) != DW_CHILDREN_no); |
446 | | |
447 | | // attributes |
448 | 6.50k | const char* attribute_begin = section.data(); |
449 | 48.1k | for (;;) { |
450 | 48.1k | SAFE_CHECK(!section.empty(), "invalid attribute section"); |
451 | 48.1k | auto attr = readAttributeSpec(section); |
452 | 48.1k | if (attr.name == 0 && attr.form == 0) { |
453 | 6.50k | break; |
454 | 6.50k | } |
455 | 48.1k | } |
456 | | |
457 | 6.50k | abbr.attributes = std::string_view(attribute_begin, section.data() - attribute_begin); |
458 | 6.50k | return true; |
459 | 6.50k | } |
460 | | |
461 | | // static |
462 | 0 | void Dwarf::readCompilationUnitAbbrs(std::string_view abbrev, CompilationUnit& cu) { |
463 | 0 | abbrev.remove_prefix(cu.abbrev_offset); |
464 | |
|
465 | 0 | DIEAbbreviation abbr; |
466 | 0 | while (readAbbreviation(abbrev, abbr)) { |
467 | | // Abbreviation code 0 is reserved for null debugging information entries. |
468 | 0 | if (abbr.code != 0 && abbr.code <= kMaxAbbreviationEntries) { |
469 | 0 | cu.abbr_cache[abbr.code - 1] = abbr; |
470 | 0 | } |
471 | 0 | } |
472 | 0 | } |
473 | | |
474 | | size_t Dwarf::forEachChild(const CompilationUnit& cu, const Die& die, |
475 | 0 | std::function<bool(const Die& die)> f) const { |
476 | 0 | size_t next_die_offset = forEachAttribute(cu, die, [&](const Attribute&) { return true; }); |
477 | 0 | if (!die.abbr.has_children) { |
478 | 0 | return next_die_offset; |
479 | 0 | } |
480 | | |
481 | 0 | auto child_die = getDieAtOffset(cu, next_die_offset); |
482 | 0 | while (child_die.code != 0) { |
483 | 0 | if (!f(child_die)) { |
484 | 0 | return child_die.offset; |
485 | 0 | } |
486 | | |
487 | | // NOTE: Don't run `f` over grandchildren, just skip over them. |
488 | 0 | size_t sibling_offset = forEachChild(cu, child_die, [](const Die&) { return true; }); |
489 | 0 | child_die = getDieAtOffset(cu, sibling_offset); |
490 | 0 | } |
491 | | |
492 | | // childDie is now a dummy die whose offset is to the code 0 marking the |
493 | | // end of the children. Need to add one to get the offset of the next die. |
494 | 0 | return child_die.offset + 1; |
495 | 0 | } |
496 | | |
497 | | /* |
498 | | * Iterate over all attributes of the given DIE, calling the given callable |
499 | | * for each. Iteration is stopped early if any of the calls return false. |
500 | | */ |
501 | | size_t Dwarf::forEachAttribute(const CompilationUnit& cu, const Die& die, |
502 | 1.36k | std::function<bool(const Attribute& die)> f) const { |
503 | 1.36k | auto attrs = die.abbr.attributes; |
504 | 1.36k | auto values = std::string_view {info_.data() + die.offset + die.attr_offset, |
505 | 1.36k | cu.offset + cu.size - die.offset - die.attr_offset}; |
506 | 16.1k | while (auto spec = readAttributeSpec(attrs)) { |
507 | 14.7k | auto attr = readAttribute(cu, die, spec, values); |
508 | 14.7k | if (!f(attr)) { |
509 | 0 | return static_cast<size_t>(-1); |
510 | 0 | } |
511 | 14.7k | } |
512 | 1.36k | return values.data() - info_.data(); |
513 | 1.36k | } |
514 | | |
515 | | Dwarf::Attribute Dwarf::readAttribute(const CompilationUnit& cu, const Die& die, AttributeSpec spec, |
516 | 14.7k | std::string_view& info) const { |
517 | | // DWARF 5 introduces new FORMs whose values are relative to some base attrs: |
518 | | // DW_AT_str_offsets_base, DW_AT_rnglists_base, DW_AT_addr_base. |
519 | | // Debug Fission DWARF 4 uses GNU DW_AT_GNU_ranges_base & DW_AT_GNU_addr_base. |
520 | | // |
521 | | // The order in which attributes appear in a CU is not defined. |
522 | | // The DW_AT_*_base attrs may appear after attributes that need them. |
523 | | // The DW_AT_*_base attrs are CU specific; so we read them just after |
524 | | // reading the CU header. During this first pass return empty values |
525 | | // when encountering a FORM that depends on DW_AT_*_base. |
526 | 14.7k | auto get_string_using_offset_table = [&](uint64_t index) { |
527 | 3.06k | if (!cu.str_offsets_base.has_value()) { |
528 | 1.02k | return std::string_view(); |
529 | 1.02k | } |
530 | | // DWARF 5: 7.26 String Offsets Table |
531 | | // The DW_AT_str_offsets_base attribute points to the first entry following |
532 | | // the header. The entries are indexed sequentially from this base entry, |
533 | | // starting from 0. |
534 | 2.04k | auto sp = str_offsets_.substr(*cu.str_offsets_base + |
535 | 2.04k | index * (cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t))); |
536 | 2.04k | uint64_t str_offset = readOffset(sp, cu.is64Bit); |
537 | 2.04k | return getStringFromStringSection(str_, str_offset); |
538 | 3.06k | }; |
539 | | |
540 | 14.7k | auto read_debug_addr = [&](uint64_t index) { |
541 | 0 | if (!cu.addr_base.has_value()) { |
542 | 0 | return uint64_t(0); |
543 | 0 | } |
544 | | // DWARF 5: 7.27 Address Table |
545 | | // The DW_AT_addr_base attribute points to the first entry following the |
546 | | // header. The entries are indexed sequentially from this base entry, |
547 | | // starting from 0. |
548 | 0 | auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); |
549 | 0 | return read<uint64_t>(sp); |
550 | 0 | }; |
551 | | |
552 | 14.7k | switch (spec.form) { |
553 | 1.36k | case DW_FORM_addr: |
554 | 1.36k | return {spec, die, read<uintptr_t>(info)}; |
555 | 0 | case DW_FORM_block1: |
556 | 0 | return {spec, die, readBytes(info, read<uint8_t>(info))}; |
557 | 0 | case DW_FORM_block2: |
558 | 0 | return {spec, die, readBytes(info, read<uint16_t>(info))}; |
559 | 0 | case DW_FORM_block4: |
560 | 0 | return {spec, die, readBytes(info, read<uint32_t>(info))}; |
561 | 0 | case DW_FORM_block: |
562 | 0 | [[fallthrough]]; |
563 | 0 | case DW_FORM_exprloc: |
564 | 0 | return {spec, die, readBytes(info, readULEB(info))}; |
565 | 50 | case DW_FORM_data1: |
566 | 50 | [[fallthrough]]; |
567 | 50 | case DW_FORM_ref1: |
568 | 50 | return {spec, die, read<uint8_t>(info)}; |
569 | 1.31k | case DW_FORM_data2: |
570 | 1.31k | [[fallthrough]]; |
571 | 1.31k | case DW_FORM_ref2: |
572 | 1.31k | return {spec, die, read<uint16_t>(info)}; |
573 | 0 | case DW_FORM_data4: |
574 | 0 | [[fallthrough]]; |
575 | 0 | case DW_FORM_ref4: |
576 | 0 | return {spec, die, read<uint32_t>(info)}; |
577 | 50 | case DW_FORM_data8: |
578 | 50 | [[fallthrough]]; |
579 | 50 | case DW_FORM_ref8: |
580 | 50 | [[fallthrough]]; |
581 | 50 | case DW_FORM_ref_sig8: |
582 | 50 | return {spec, die, read<uint64_t>(info)}; |
583 | 0 | case DW_FORM_sdata: |
584 | 0 | return {spec, die, static_cast<uint64_t>(readSLEB(info))}; |
585 | 0 | case DW_FORM_udata: |
586 | 0 | [[fallthrough]]; |
587 | 0 | case DW_FORM_ref_udata: |
588 | 0 | return {spec, die, readULEB(info)}; |
589 | 0 | case DW_FORM_flag: |
590 | 0 | return {spec, die, read<uint8_t>(info)}; |
591 | 0 | case DW_FORM_flag_present: |
592 | 0 | return {spec, die, 1ULL}; |
593 | 6.56k | case DW_FORM_sec_offset: |
594 | 6.56k | [[fallthrough]]; |
595 | 6.56k | case DW_FORM_ref_addr: |
596 | 6.56k | return {spec, die, readOffset(info, die.is64Bit)}; |
597 | 0 | case DW_FORM_string: |
598 | 0 | return {spec, die, readNullTerminated(info)}; |
599 | 50 | case DW_FORM_strp: |
600 | 50 | return {spec, die, getStringFromStringSection(str_, readOffset(info, die.is64Bit))}; |
601 | 0 | case DW_FORM_indirect: // form is explicitly specified |
602 | | // Update spec with the actual FORM. |
603 | 0 | spec.form = readULEB(info); |
604 | 0 | return readAttribute(cu, die, spec, info); |
605 | | |
606 | | // DWARF 5: |
607 | 0 | case DW_FORM_implicit_const: // form is explicitly specified |
608 | | // For attributes with this form, the attribute specification contains a |
609 | | // third part, which is a signed LEB128 number. The value of this number |
610 | | // is used as the value of the attribute, and no value is stored in the |
611 | | // .debug_info section. |
612 | 0 | return {spec, die, static_cast<uint64_t>(spec.implicitConst)}; |
613 | | |
614 | 0 | case DW_FORM_addrx: |
615 | 0 | return {spec, die, read_debug_addr(readULEB(info))}; |
616 | 0 | case DW_FORM_addrx1: |
617 | 0 | return {spec, die, read_debug_addr(readU64<1>(info))}; |
618 | 0 | case DW_FORM_addrx2: |
619 | 0 | return {spec, die, read_debug_addr(readU64<2>(info))}; |
620 | 0 | case DW_FORM_addrx3: |
621 | 0 | return {spec, die, read_debug_addr(readU64<3>(info))}; |
622 | 0 | case DW_FORM_addrx4: |
623 | 0 | return {spec, die, read_debug_addr(readU64<4>(info))}; |
624 | | |
625 | 100 | case DW_FORM_line_strp: |
626 | 100 | return {spec, die, getStringFromStringSection(line_str_, readOffset(info, die.is64Bit))}; |
627 | | |
628 | 0 | case DW_FORM_strx: |
629 | 0 | return {spec, die, get_string_using_offset_table(readULEB(info))}; |
630 | 3.93k | case DW_FORM_strx1: |
631 | 3.93k | return {spec, die, get_string_using_offset_table(readU64<1>(info))}; |
632 | 0 | case DW_FORM_strx2: |
633 | 0 | return {spec, die, get_string_using_offset_table(readU64<2>(info))}; |
634 | 0 | case DW_FORM_strx3: |
635 | 0 | return {spec, die, get_string_using_offset_table(readU64<3>(info))}; |
636 | 0 | case DW_FORM_strx4: |
637 | 0 | return {spec, die, get_string_using_offset_table(readU64<4>(info))}; |
638 | | |
639 | 1.31k | case DW_FORM_rnglistx: { |
640 | 1.31k | auto index = readULEB(info); |
641 | 1.31k | if (!cu.rnglists_base.has_value()) { |
642 | 656 | return {spec, die, 0ULL}; |
643 | 656 | } |
644 | 656 | const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); |
645 | 656 | auto sp = rnglists_.substr(*cu.rnglists_base + index * offset_size); |
646 | 656 | auto offset = readOffset(sp, cu.is64Bit); |
647 | 656 | return {spec, die, *cu.rnglists_base + offset}; |
648 | 1.31k | } |
649 | | |
650 | 0 | case DW_FORM_loclistx: { |
651 | 0 | auto index = readULEB(info); |
652 | 0 | if (!cu.loclists_base.has_value()) { |
653 | 0 | return {spec, die, 0ULL}; |
654 | 0 | } |
655 | 0 | const uint64_t offset_size = cu.is64Bit ? sizeof(uint64_t) : sizeof(uint32_t); |
656 | 0 | auto sp = loclists_.substr(*cu.loclists_base + index * offset_size); |
657 | 0 | auto offset = readOffset(sp, cu.is64Bit); |
658 | 0 | return {spec, die, *cu.loclists_base + offset}; |
659 | 0 | } |
660 | | |
661 | 0 | case DW_FORM_data16: |
662 | 0 | return {spec, die, readBytes(info, 16)}; |
663 | | |
664 | 0 | case DW_FORM_ref_sup4: |
665 | 0 | case DW_FORM_ref_sup8: |
666 | 0 | case DW_FORM_strp_sup: |
667 | 0 | SAFE_CHECK(false, "Unexpected DWARF5 supplimentary object files"); |
668 | |
|
669 | 0 | default: |
670 | 0 | SAFE_CHECK(false, "invalid attribute form"); |
671 | 14.7k | } |
672 | 0 | return {spec, die, 0ULL}; |
673 | 14.7k | } |
674 | | |
675 | | // static |
676 | 64.2k | Dwarf::AttributeSpec Dwarf::readAttributeSpec(std::string_view& sp) { |
677 | 64.2k | Dwarf::AttributeSpec spec; |
678 | 64.2k | spec.name = readULEB(sp); |
679 | 64.2k | spec.form = readULEB(sp); |
680 | 64.2k | if (spec.form == DW_FORM_implicit_const) { |
681 | 3.70k | spec.implicitConst = readSLEB(sp); |
682 | 3.70k | } |
683 | 64.2k | return spec; |
684 | 64.2k | } |
685 | | |
686 | 681 | Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const { |
687 | | // SAFE_CHECK(offset < info_.size(), "unexpected offset"); |
688 | 681 | CompilationUnit cu; |
689 | 681 | std::string_view chunk(info_); |
690 | 681 | cu.offset = offset; |
691 | 681 | chunk.remove_prefix(offset); |
692 | | |
693 | | // 1) unit_length |
694 | 681 | auto initial_length = read<uint32_t>(chunk); |
695 | 681 | cu.is64Bit = (initial_length == uint32_t(-1)); |
696 | 681 | cu.size = cu.is64Bit ? read<uint64_t>(chunk) : initial_length; |
697 | 681 | SAFE_CHECK(cu.size <= chunk.size(), "invalid chunk size"); |
698 | 681 | cu.size += cu.is64Bit ? 12 : 4; |
699 | | |
700 | | // 2) version |
701 | 681 | cu.version = read<uint16_t>(chunk); |
702 | 681 | SAFE_CHECK(cu.version >= 2 && cu.version <= 5, "invalid info version"); |
703 | | |
704 | 681 | if (cu.version == 5) { |
705 | | // DWARF5: 7.5.1.1 Full and Partial Compilation Unit Headers |
706 | | // 3) unit_type (new DWARF 5) |
707 | 681 | cu.unit_type = read<uint8_t>(chunk); |
708 | 681 | if (cu.unit_type != DW_UT_compile && cu.unit_type != DW_UT_skeleton) { |
709 | 0 | return cu; |
710 | 0 | } |
711 | | // 4) address_size |
712 | 681 | cu.addr_size = read<uint8_t>(chunk); |
713 | 681 | SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); |
714 | | |
715 | | // 5) debug_abbrev_offset |
716 | 681 | cu.abbrev_offset = readOffset(chunk, cu.is64Bit); |
717 | | |
718 | 681 | if (cu.unit_type == DW_UT_skeleton) { |
719 | | // 6) dwo_id |
720 | 0 | read<uint64_t>(chunk); |
721 | 0 | } |
722 | 681 | } else { |
723 | | // DWARF4 has a single type of unit in .debug_info |
724 | 0 | cu.unit_type = DW_UT_compile; |
725 | | // 3) debug_abbrev_offset |
726 | 0 | cu.abbrev_offset = readOffset(chunk, cu.is64Bit); |
727 | | // 4) address_size |
728 | 0 | cu.addr_size = read<uint8_t>(chunk); |
729 | 0 | SAFE_CHECK(cu.addr_size == sizeof(uintptr_t), "invalid address size"); |
730 | 0 | } |
731 | 681 | cu.first_die = chunk.data() - info_.data(); |
732 | 681 | if (cu.version < 5) { |
733 | 0 | return cu; |
734 | 0 | } |
735 | | |
736 | 681 | Die die = getDieAtOffset(cu, cu.first_die); |
737 | 681 | if (die.abbr.tag != DW_TAG_compile_unit) { |
738 | 0 | return cu; |
739 | 0 | } |
740 | | |
741 | | // Read the DW_AT_*_base attributes. |
742 | | // Attributes which use FORMs relative to these base attrs |
743 | | // will not have valid values during this first pass! |
744 | 7.36k | forEachAttribute(cu, die, [&](const Attribute& attr) { |
745 | 7.36k | switch (attr.spec.name) { |
746 | 656 | case DW_AT_addr_base: |
747 | 656 | case DW_AT_GNU_addr_base: |
748 | 656 | cu.addr_base = std::get<uint64_t>(attr.attr_value); |
749 | 656 | break; |
750 | 634 | case DW_AT_loclists_base: |
751 | 634 | cu.loclists_base = std::get<uint64_t>(attr.attr_value); |
752 | 634 | break; |
753 | 656 | case DW_AT_rnglists_base: |
754 | 656 | case DW_AT_GNU_ranges_base: |
755 | 656 | cu.rnglists_base = std::get<uint64_t>(attr.attr_value); |
756 | 656 | break; |
757 | 656 | case DW_AT_str_offsets_base: |
758 | 656 | cu.str_offsets_base = std::get<uint64_t>(attr.attr_value); |
759 | 656 | break; |
760 | 7.36k | } |
761 | 7.36k | return true; // continue forEachAttribute |
762 | 7.36k | }); |
763 | 681 | return cu; |
764 | 681 | } |
765 | | |
766 | | // Finds the Compilation Unit starting at offset. |
767 | 0 | Dwarf::CompilationUnit Dwarf::findCompilationUnit(uint64_t targetOffset) const { |
768 | | // SAFE_CHECK(targetOffset < info_.size(), "unexpected target address"); |
769 | 0 | uint64_t offset = 0; |
770 | 0 | while (offset < info_.size()) { |
771 | 0 | std::string_view chunk(info_); |
772 | 0 | chunk.remove_prefix(offset); |
773 | |
|
774 | 0 | auto initial_length = read<uint32_t>(chunk); |
775 | 0 | auto is64_bit = (initial_length == static_cast<uint32_t>(-1)); |
776 | 0 | auto size = is64_bit ? read<uint64_t>(chunk) : initial_length; |
777 | 0 | SAFE_CHECK(size <= chunk.size(), "invalid chunk size"); |
778 | 0 | size += is64_bit ? 12 : 4; |
779 | |
|
780 | 0 | if (offset + size > targetOffset) { |
781 | 0 | break; |
782 | 0 | } |
783 | 0 | offset += size; |
784 | 0 | } |
785 | 0 | return getCompilationUnit(offset); |
786 | 0 | } |
787 | | |
788 | 1.36k | Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const { |
789 | | // Linear search in the .debug_abbrev section, starting at offset |
790 | 1.36k | std::string_view section = abbrev_; |
791 | 1.36k | section.remove_prefix(offset); |
792 | | |
793 | 1.36k | Dwarf::DIEAbbreviation abbr; |
794 | 6.50k | while (readAbbreviation(section, abbr)) { |
795 | 6.50k | if (abbr.code == code) { |
796 | 1.36k | return abbr; |
797 | 1.36k | } |
798 | 6.50k | } |
799 | | |
800 | 0 | SAFE_CHECK(false, "could not find abbreviation code"); |
801 | 0 | } |
802 | | |
803 | | Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view& sp, uint64_t form, |
804 | 0 | bool is64_bit) const { |
805 | 0 | switch (form) { |
806 | 0 | case DW_FORM_addr: |
807 | 0 | return uint64_t(read<uintptr_t>(sp)); |
808 | 0 | case DW_FORM_block1: |
809 | 0 | return readBytes(sp, read<uint8_t>(sp)); |
810 | 0 | case DW_FORM_block2: |
811 | 0 | return readBytes(sp, read<uint16_t>(sp)); |
812 | 0 | case DW_FORM_block4: |
813 | 0 | return readBytes(sp, read<uint32_t>(sp)); |
814 | 0 | case DW_FORM_block: |
815 | 0 | [[fallthrough]]; |
816 | 0 | case DW_FORM_exprloc: |
817 | 0 | return readBytes(sp, readULEB(sp)); |
818 | 0 | case DW_FORM_data1: |
819 | 0 | [[fallthrough]]; |
820 | 0 | case DW_FORM_ref1: |
821 | 0 | return uint64_t(read<uint8_t>(sp)); |
822 | 0 | case DW_FORM_data2: |
823 | 0 | [[fallthrough]]; |
824 | 0 | case DW_FORM_ref2: |
825 | 0 | return uint64_t(read<uint16_t>(sp)); |
826 | 0 | case DW_FORM_data4: |
827 | 0 | [[fallthrough]]; |
828 | 0 | case DW_FORM_ref4: |
829 | 0 | return uint64_t(read<uint32_t>(sp)); |
830 | 0 | case DW_FORM_data8: |
831 | 0 | [[fallthrough]]; |
832 | 0 | case DW_FORM_ref8: |
833 | 0 | return read<uint64_t>(sp); |
834 | 0 | case DW_FORM_sdata: |
835 | 0 | return uint64_t(readSLEB(sp)); |
836 | 0 | case DW_FORM_udata: |
837 | 0 | [[fallthrough]]; |
838 | 0 | case DW_FORM_ref_udata: |
839 | 0 | return readULEB(sp); |
840 | 0 | case DW_FORM_flag: |
841 | 0 | return uint64_t(read<uint8_t>(sp)); |
842 | 0 | case DW_FORM_flag_present: |
843 | 0 | return uint64_t(1); |
844 | 0 | case DW_FORM_sec_offset: |
845 | 0 | [[fallthrough]]; |
846 | 0 | case DW_FORM_ref_addr: |
847 | 0 | return readOffset(sp, is64_bit); |
848 | 0 | case DW_FORM_string: |
849 | 0 | return readNullTerminated(sp); |
850 | 0 | case DW_FORM_strp: |
851 | 0 | return getStringFromStringSection(str_, readOffset(sp, is64_bit)); |
852 | 0 | case DW_FORM_indirect: // form is explicitly specified |
853 | 0 | return readAttributeValue(sp, readULEB(sp), is64_bit); |
854 | 0 | default: |
855 | 0 | SAFE_CHECK(false, "invalid attribute form"); |
856 | 0 | return uint64_t(1); |
857 | 0 | ; |
858 | 0 | } |
859 | 0 | } |
860 | | |
861 | | /** |
862 | | * Find @address in .debug_aranges and return the offset in |
863 | | * .debug_info for compilation unit to which this address belongs. |
864 | | */ |
865 | 968 | bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t& offset) { |
866 | 968 | Section aranges_section(aranges); |
867 | 968 | std::string_view chunk; |
868 | 1.02M | while (aranges_section.next(chunk)) { |
869 | 1.02M | auto version = read<uint16_t>(chunk); |
870 | 1.02M | SAFE_CHECK(version == 2, "invalid aranges version"); |
871 | | |
872 | 1.02M | offset = readOffset(chunk, aranges_section.is64Bit()); |
873 | 1.02M | auto address_size = read<uint8_t>(chunk); |
874 | 1.02M | SAFE_CHECK(address_size == sizeof(uintptr_t), "invalid address size"); |
875 | 1.02M | auto segment_size = read<uint8_t>(chunk); |
876 | 1.02M | SAFE_CHECK(segment_size == 0, "segmented architecture not supported"); |
877 | | |
878 | | // Padded to a multiple of 2 addresses. |
879 | | // Strangely enough, this is the only place in the DWARF spec that requires |
880 | | // padding. |
881 | 1.02M | skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t)); |
882 | 1.22G | for (;;) { |
883 | 1.22G | auto start = read<uintptr_t>(chunk); |
884 | 1.22G | auto length = read<uintptr_t>(chunk); |
885 | | |
886 | 1.22G | if (start == 0 && length == 0) { |
887 | 1.02M | break; |
888 | 1.02M | } |
889 | | |
890 | | // Is our address in this range? |
891 | 1.21G | if (address >= start && address < start + length) { |
892 | 681 | return true; |
893 | 681 | } |
894 | 1.21G | } |
895 | 1.02M | } |
896 | 287 | return false; |
897 | 968 | } |
898 | | |
899 | 1.36k | Dwarf::Die Dwarf::getDieAtOffset(const CompilationUnit& cu, uint64_t offset) const { |
900 | 1.36k | SAFE_CHECK(offset < info_.size(), "unexpected offset {}, info size {}", offset, info_.size()); |
901 | 1.36k | Die die; |
902 | 1.36k | std::string_view sp {info_.data() + offset, cu.offset + cu.size - offset}; |
903 | 1.36k | die.offset = offset; |
904 | 1.36k | die.is64Bit = cu.is64Bit; |
905 | 1.36k | auto code = readULEB(sp); |
906 | 1.36k | die.code = code; |
907 | 1.36k | if (code == 0) { |
908 | 0 | return die; |
909 | 0 | } |
910 | 1.36k | die.attr_offset = sp.data() - info_.data() - offset; |
911 | 1.36k | die.abbr = !cu.abbr_cache.empty() && die.code < kMaxAbbreviationEntries |
912 | 1.36k | ? cu.abbr_cache[die.code - 1] |
913 | 1.36k | : getAbbreviation(die.code, cu.abbrev_offset); |
914 | | |
915 | 1.36k | return die; |
916 | 1.36k | } |
917 | | |
918 | | /** |
919 | | * Find the @locationInfo for @address in the compilation unit represented |
920 | | * by the @sp .debug_info entry. |
921 | | * Returns whether the address was found. |
922 | | * Advances @sp to the next entry in .debug_info. |
923 | | */ |
924 | | bool Dwarf::findLocation(uintptr_t address, const LocationInfoMode mode, CompilationUnit& cu, |
925 | 681 | LocationInfo& info, std::vector<SymbolizedFrame>& inline_frames) const { |
926 | 681 | Die die = getDieAtOffset(cu, cu.first_die); |
927 | | // Partial compilation unit (DW_TAG_partial_unit) is not supported. |
928 | 681 | SAFE_CHECK(die.abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry"); |
929 | | |
930 | | // Offset in .debug_line for the line number VM program for this CU |
931 | 681 | std::optional<uint64_t> line_offset = 0; |
932 | 681 | std::string_view compilation_directory; |
933 | 681 | std::optional<std::string_view> main_file_name; |
934 | 681 | std::optional<uint64_t> base_addr_cu; |
935 | | |
936 | 7.36k | forEachAttribute(cu, die, [&](const Attribute& attr) { |
937 | 7.36k | switch (attr.spec.name) { |
938 | 681 | case DW_AT_stmt_list: |
939 | | // Offset in .debug_line for the line number VM program for this |
940 | | // compilation unit |
941 | 681 | line_offset = std::get<uint64_t>(attr.attr_value); |
942 | 681 | break; |
943 | 681 | case DW_AT_comp_dir: |
944 | | // Compilation directory |
945 | 681 | compilation_directory = std::get<std::string_view>(attr.attr_value); |
946 | 681 | break; |
947 | 681 | case DW_AT_name: |
948 | | // File name of main file being compiled |
949 | 681 | main_file_name = std::get<std::string_view>(attr.attr_value); |
950 | 681 | break; |
951 | 681 | case DW_AT_low_pc: |
952 | 681 | case DW_AT_entry_pc: |
953 | | // 2.17.1: historically DW_AT_low_pc was used. DW_AT_entry_pc was |
954 | | // introduced in DWARF3. Support either to determine the base address of |
955 | | // the CU. |
956 | 681 | base_addr_cu = std::get<uint64_t>(attr.attr_value); |
957 | 681 | break; |
958 | 7.36k | } |
959 | | // Iterate through all attributes until find all above. |
960 | 7.36k | return true; |
961 | 7.36k | }); |
962 | | |
963 | 681 | if (main_file_name) { |
964 | 681 | info.has_main_file = true; |
965 | 681 | info.main_file = Path(compilation_directory, "", *main_file_name); |
966 | 681 | } |
967 | | |
968 | 681 | if (!line_offset) { |
969 | 0 | return false; |
970 | 0 | } |
971 | | |
972 | 681 | std::string_view line_section(line_); |
973 | 681 | line_section.remove_prefix(*line_offset); |
974 | 681 | LineNumberVM line_vm(line_section, compilation_directory, str_, line_str_); |
975 | | |
976 | | // Execute line number VM program to find file and line |
977 | 681 | info.has_file_and_line = line_vm.findAddress(address, info.file, info.line); |
978 | | |
979 | 681 | bool check_inline = (mode == LocationInfoMode::FULL_WITH_INLINE); |
980 | | |
981 | 681 | if (info.has_file_and_line && check_inline) { |
982 | | // Re-get the compilation unit with abbreviation cached. |
983 | 0 | cu.abbr_cache.clear(); |
984 | 0 | cu.abbr_cache.resize(kMaxAbbreviationEntries); |
985 | 0 | readCompilationUnitAbbrs(abbrev_, cu); |
986 | | |
987 | | // Find the subprogram that matches the given address. |
988 | 0 | Die subprogram; |
989 | 0 | findSubProgramDieForAddress(cu, die, address, base_addr_cu, subprogram); |
990 | | |
991 | | // Subprogram is the DIE of caller function. |
992 | 0 | if (/*check_inline &&*/ subprogram.abbr.has_children) { |
993 | | // Use an extra location and get its call file and call line, so that |
994 | | // they can be used for the second last location when we don't have |
995 | | // enough inline frames for all inline functions call stack. |
996 | 0 | const size_t max_size = Dwarf::kMaxInlineLocationInfoPerFrame + 1; |
997 | 0 | std::vector<CallLocation> call_locations; |
998 | 0 | call_locations.reserve(Dwarf::kMaxInlineLocationInfoPerFrame + 1); |
999 | |
|
1000 | 0 | findInlinedSubroutineDieForAddress(cu, subprogram, line_vm, address, base_addr_cu, |
1001 | 0 | call_locations, max_size); |
1002 | 0 | size_t num_found = call_locations.size(); |
1003 | |
|
1004 | 0 | if (num_found > 0) { |
1005 | 0 | const auto inner_most_file = info.file; |
1006 | 0 | const auto inner_most_line = info.line; |
1007 | | |
1008 | | // Earlier we filled in locationInfo: |
1009 | | // - mainFile: the path to the CU -- the file where the non-inlined |
1010 | | // call is made from. |
1011 | | // - file + line: the location of the inner-most inlined call. |
1012 | | // Here we already find inlined info so mainFile would be redundant. |
1013 | 0 | info.has_main_file = false; |
1014 | 0 | info.main_file = Path {}; |
1015 | | // @findInlinedSubroutineDieForAddress fills inlineLocations[0] with the |
1016 | | // file+line of the non-inlined outer function making the call. |
1017 | | // locationInfo.name is already set by the caller by looking up the |
1018 | | // non-inlined function @address belongs to. |
1019 | 0 | info.has_file_and_line = true; |
1020 | 0 | info.file = call_locations[0].file; |
1021 | 0 | info.line = call_locations[0].line; |
1022 | | |
1023 | | // The next inlined subroutine's call file and call line is the current |
1024 | | // caller's location. |
1025 | 0 | for (size_t i = 0; i < num_found - 1; ++i) { |
1026 | 0 | call_locations[i].file = call_locations[i + 1].file; |
1027 | 0 | call_locations[i].line = call_locations[i + 1].line; |
1028 | 0 | } |
1029 | | // CallLocation for the inner-most inlined function: |
1030 | | // - will be computed if enough space was available in the passed |
1031 | | // buffer. |
1032 | | // - will have a .name, but no !.file && !.line |
1033 | | // - its corresponding file+line is the one returned by LineVM based |
1034 | | // on @address. |
1035 | | // Use the inner-most inlined file+line info we got from the LineVM. |
1036 | 0 | call_locations[num_found - 1].file = inner_most_file; |
1037 | 0 | call_locations[num_found - 1].line = inner_most_line; |
1038 | | |
1039 | | // Fill in inline frames in reverse order (as expected by the caller). |
1040 | 0 | std::reverse(call_locations.begin(), call_locations.end()); |
1041 | 0 | for (const auto& call_location : call_locations) { |
1042 | 0 | SymbolizedFrame inline_frame; |
1043 | 0 | inline_frame.found = true; |
1044 | 0 | inline_frame.addr = address; |
1045 | 0 | if (!call_location.name.empty()) { |
1046 | 0 | inline_frame.name = call_location.name.data(); |
1047 | 0 | } else { |
1048 | 0 | inline_frame.name = nullptr; |
1049 | 0 | } |
1050 | 0 | inline_frame.location.has_file_and_line = true; |
1051 | 0 | inline_frame.location.file = call_location.file; |
1052 | 0 | inline_frame.location.line = call_location.line; |
1053 | 0 | inline_frames.push_back(inline_frame); |
1054 | 0 | } |
1055 | 0 | } |
1056 | 0 | } |
1057 | 0 | } |
1058 | | |
1059 | 681 | return info.has_file_and_line; |
1060 | 681 | } |
1061 | | |
1062 | | void Dwarf::findSubProgramDieForAddress(const CompilationUnit& cu, const Die& die, uint64_t address, |
1063 | | std::optional<uint64_t> base_addr_cu, |
1064 | 0 | Die& subprogram) const { |
1065 | 0 | forEachChild(cu, die, [&](const Die& child_die) { |
1066 | 0 | if (child_die.abbr.tag == DW_TAG_subprogram) { |
1067 | 0 | std::optional<uint64_t> low_pc; |
1068 | 0 | std::optional<uint64_t> high_pc; |
1069 | 0 | std::optional<bool> is_high_pc_addr; |
1070 | 0 | std::optional<uint64_t> range_offset; |
1071 | 0 | forEachAttribute(cu, child_die, [&](const Attribute& attr) { |
1072 | 0 | switch (attr.spec.name) { |
1073 | 0 | case DW_AT_ranges: |
1074 | 0 | range_offset = std::get<uint64_t>(attr.attr_value); |
1075 | 0 | break; |
1076 | 0 | case DW_AT_low_pc: |
1077 | 0 | low_pc = std::get<uint64_t>(attr.attr_value); |
1078 | 0 | break; |
1079 | 0 | case DW_AT_high_pc: |
1080 | | // The value of the DW_AT_high_pc attribute can be |
1081 | | // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). |
1082 | 0 | is_high_pc_addr = attr.spec.form == DW_FORM_addr || // |
1083 | 0 | attr.spec.form == DW_FORM_addrx || // |
1084 | 0 | attr.spec.form == DW_FORM_addrx1 || // |
1085 | 0 | attr.spec.form == DW_FORM_addrx2 || // |
1086 | 0 | attr.spec.form == DW_FORM_addrx3 || // |
1087 | 0 | attr.spec.form == DW_FORM_addrx4; |
1088 | 0 | high_pc = std::get<uint64_t>(attr.attr_value); |
1089 | 0 | break; |
1090 | 0 | } |
1091 | | // Iterate through all attributes until find all above. |
1092 | 0 | return true; |
1093 | 0 | }); |
1094 | 0 | bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && |
1095 | 0 | (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); |
1096 | 0 | bool range_match = |
1097 | 0 | range_offset && isAddrInRangeList(cu, address, base_addr_cu, |
1098 | 0 | range_offset.value(), cu.addr_size); |
1099 | 0 | if (pc_match || range_match) { |
1100 | 0 | subprogram = child_die; |
1101 | 0 | return false; |
1102 | 0 | } |
1103 | 0 | } |
1104 | | |
1105 | 0 | findSubProgramDieForAddress(cu, child_die, address, base_addr_cu, subprogram); |
1106 | | |
1107 | | // Iterates through children until find the inline subprogram. |
1108 | 0 | return true; |
1109 | 0 | }); |
1110 | 0 | } |
1111 | | |
1112 | | /** |
1113 | | * Find DW_TAG_inlined_subroutine child DIEs that contain @address and |
1114 | | * then extract: |
1115 | | * - Where was it called from (DW_AT_call_file & DW_AT_call_line): |
1116 | | * the statement or expression that caused the inline expansion. |
1117 | | * - The inlined function's name. As a function may be inlined multiple |
1118 | | * times, common attributes like DW_AT_linkage_name or DW_AT_name |
1119 | | * are only stored in its "concrete out-of-line instance" (a |
1120 | | * DW_TAG_subprogram) which we find using DW_AT_abstract_origin. |
1121 | | */ |
1122 | | void Dwarf::findInlinedSubroutineDieForAddress(const CompilationUnit& cu, const Die& die, |
1123 | | const LineNumberVM& line_vm, uint64_t address, |
1124 | | std::optional<uint64_t> base_addr_cu, |
1125 | | std::vector<CallLocation>& locations, |
1126 | 0 | const size_t max_size) const { |
1127 | 0 | if (locations.size() >= max_size) { |
1128 | 0 | return; |
1129 | 0 | } |
1130 | | |
1131 | 0 | forEachChild(cu, die, [&](const Die& child_die) { |
1132 | | // Between a DW_TAG_subprogram and and DW_TAG_inlined_subroutine we might |
1133 | | // have arbitrary intermediary "nodes", including DW_TAG_common_block, |
1134 | | // DW_TAG_lexical_block, DW_TAG_try_block, DW_TAG_catch_block and |
1135 | | // DW_TAG_with_stmt, etc. |
1136 | | // We can't filter with locationhere since its range may be not specified. |
1137 | | // See section 2.6.2: A location list containing only an end of list entry |
1138 | | // describes an object that exists in the source code but not in the |
1139 | | // executable program. |
1140 | 0 | if (child_die.abbr.tag == DW_TAG_try_block || child_die.abbr.tag == DW_TAG_catch_block || |
1141 | 0 | child_die.abbr.tag == DW_TAG_entry_point || child_die.abbr.tag == DW_TAG_common_block || |
1142 | 0 | child_die.abbr.tag == DW_TAG_lexical_block) { |
1143 | 0 | findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, |
1144 | 0 | locations, max_size); |
1145 | 0 | return true; |
1146 | 0 | } |
1147 | | |
1148 | 0 | std::optional<uint64_t> low_pc; |
1149 | 0 | std::optional<uint64_t> high_pc; |
1150 | 0 | std::optional<bool> is_high_pc_addr; |
1151 | 0 | std::optional<uint64_t> abstract_origin; |
1152 | 0 | std::optional<uint64_t> abstract_origin_ref_type; |
1153 | 0 | std::optional<uint64_t> call_file; |
1154 | 0 | std::optional<uint64_t> call_line; |
1155 | 0 | std::optional<uint64_t> range_offset; |
1156 | 0 | forEachAttribute(cu, child_die, [&](const Attribute& attr) { |
1157 | 0 | switch (attr.spec.name) { |
1158 | 0 | case DW_AT_ranges: |
1159 | 0 | range_offset = std::get<uint64_t>(attr.attr_value); |
1160 | 0 | break; |
1161 | 0 | case DW_AT_low_pc: |
1162 | 0 | low_pc = std::get<uint64_t>(attr.attr_value); |
1163 | 0 | break; |
1164 | 0 | case DW_AT_high_pc: |
1165 | | // The value of the DW_AT_high_pc attribute can be |
1166 | | // an address (DW_FORM_addr*) or an offset (DW_FORM_data*). |
1167 | 0 | is_high_pc_addr = attr.spec.form == DW_FORM_addr || // |
1168 | 0 | attr.spec.form == DW_FORM_addrx || // |
1169 | 0 | attr.spec.form == DW_FORM_addrx1 || // |
1170 | 0 | attr.spec.form == DW_FORM_addrx2 || // |
1171 | 0 | attr.spec.form == DW_FORM_addrx3 || // |
1172 | 0 | attr.spec.form == DW_FORM_addrx4; |
1173 | 0 | high_pc = std::get<uint64_t>(attr.attr_value); |
1174 | 0 | break; |
1175 | 0 | case DW_AT_abstract_origin: |
1176 | 0 | abstract_origin_ref_type = attr.spec.form; |
1177 | 0 | abstract_origin = std::get<uint64_t>(attr.attr_value); |
1178 | 0 | break; |
1179 | 0 | case DW_AT_call_line: |
1180 | 0 | call_line = std::get<uint64_t>(attr.attr_value); |
1181 | 0 | break; |
1182 | 0 | case DW_AT_call_file: |
1183 | 0 | call_file = std::get<uint64_t>(attr.attr_value); |
1184 | 0 | break; |
1185 | 0 | } |
1186 | | // Iterate through all until find all above attributes. |
1187 | 0 | return true; |
1188 | 0 | }); |
1189 | | |
1190 | | // 2.17 Code Addresses and Ranges |
1191 | | // Any debugging information entry describing an entity that has a |
1192 | | // machine code address or range of machine code addresses, |
1193 | | // which includes compilation units, module initialization, subroutines, |
1194 | | // ordinary blocks, try/catch blocks, labels and the like, may have |
1195 | | // - A DW_AT_low_pc attribute for a single address, |
1196 | | // - A DW_AT_low_pc and DW_AT_high_pc pair of attributes for a |
1197 | | // single contiguous range of addresses, or |
1198 | | // - A DW_AT_ranges attribute for a non-contiguous range of addresses. |
1199 | | // TODO: Support DW_TAG_entry_point and DW_TAG_common_block that don't |
1200 | | // have DW_AT_low_pc/DW_AT_high_pc pairs and DW_AT_ranges. |
1201 | | // TODO: Support relocated address which requires lookup in relocation map. |
1202 | 0 | bool pc_match = low_pc && high_pc && is_high_pc_addr && address >= *low_pc && |
1203 | 0 | (address < (*is_high_pc_addr ? *high_pc : *low_pc + *high_pc)); |
1204 | 0 | bool range_match = range_offset && isAddrInRangeList(cu, address, base_addr_cu, |
1205 | 0 | range_offset.value(), cu.addr_size); |
1206 | 0 | if (!pc_match && !range_match) { |
1207 | | // Address doesn't match. Keep searching other children. |
1208 | 0 | return true; |
1209 | 0 | } |
1210 | | |
1211 | 0 | if (!abstract_origin || !abstract_origin_ref_type || !call_line || !call_file) { |
1212 | | // We expect a single sibling DIE to match on addr, but it's missing |
1213 | | // required fields. Stop searching for other DIEs. |
1214 | 0 | return false; |
1215 | 0 | } |
1216 | | |
1217 | 0 | CallLocation location; |
1218 | 0 | location.file = line_vm.getFullFileName(*call_file); |
1219 | 0 | location.line = *call_line; |
1220 | | |
1221 | | /// Something wrong with receiving debug info about inline. |
1222 | | /// If set to true we stop parsing DWARF. |
1223 | 0 | bool die_for_inline_broken = false; |
1224 | |
|
1225 | 0 | auto get_function_name = [&](const CompilationUnit& srcu, uint64_t die_offset) { |
1226 | 0 | Die decl_die = getDieAtOffset(srcu, die_offset); |
1227 | 0 | auto& die_to_look_for_name = decl_die; |
1228 | |
|
1229 | 0 | Die def_die; |
1230 | | // Jump to the actual function definition instead of declaration for name |
1231 | | // and line info. |
1232 | | // DW_AT_specification: Incomplete, non-defining, or separate declaration |
1233 | | // corresponding to a declaration |
1234 | 0 | auto offset = getAttribute<uint64_t>(srcu, decl_die, DW_AT_specification); |
1235 | 0 | if (offset) { |
1236 | | /// FIXME: actually it's a bug in our DWARF parser. |
1237 | | /// |
1238 | | /// Most of the times compilation unit offset (srcu.offset) is some big number inside .debug_info (like 434782255). |
1239 | | /// Offset of DIE definition is some small relative number to srcu.offset (like 3518). |
1240 | | /// However in some unknown cases offset looks like global, non relative number (like 434672579) and in this |
1241 | | /// case we obviously doing something wrong parsing DWARF. |
1242 | | /// |
1243 | | /// What is important -- this bug? reproduces only with -flto=thin in release mode. |
1244 | | /// Also llvm-dwarfdump --verify ./clickhouse says that our DWARF is ok, so it's another prove |
1245 | | /// that we just doing something wrong. |
1246 | | /// |
1247 | | /// FIXME: Currently we just give up parsing DWARF for inlines when we got into this situation. |
1248 | 0 | if (srcu.offset + offset.value() >= info_.size()) { |
1249 | 0 | die_for_inline_broken = true; |
1250 | 0 | } else { |
1251 | 0 | def_die = getDieAtOffset(srcu, srcu.offset + offset.value()); |
1252 | 0 | die_to_look_for_name = def_die; |
1253 | 0 | } |
1254 | 0 | } |
1255 | |
|
1256 | 0 | std::string_view name; |
1257 | |
|
1258 | 0 | if (die_for_inline_broken) { |
1259 | 0 | return name; |
1260 | 0 | } |
1261 | | |
1262 | | // The file and line will be set in the next inline subroutine based on |
1263 | | // its DW_AT_call_file and DW_AT_call_line. |
1264 | 0 | forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute& attr) { |
1265 | 0 | switch (attr.spec.name) { |
1266 | 0 | case DW_AT_linkage_name: |
1267 | 0 | name = std::get<std::string_view>(attr.attr_value); |
1268 | 0 | break; |
1269 | 0 | case DW_AT_name: |
1270 | | // NOTE: when DW_AT_linkage_name and DW_AT_name match, dwarf |
1271 | | // emitters omit DW_AT_linkage_name (to save space). If present |
1272 | | // DW_AT_linkage_name should always be preferred (mangled C++ name |
1273 | | // vs just the function name). |
1274 | 0 | if (name.empty()) { |
1275 | 0 | name = std::get<std::string_view>(attr.attr_value); |
1276 | 0 | } |
1277 | 0 | break; |
1278 | 0 | } |
1279 | 0 | return true; |
1280 | 0 | }); |
1281 | 0 | return name; |
1282 | 0 | }; |
1283 | | |
1284 | | // DW_AT_abstract_origin is a reference. There a 3 types of references: |
1285 | | // - the reference can identify any debugging information entry within the |
1286 | | // compilation unit (DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4, |
1287 | | // DW_FORM_ref8, DW_FORM_ref_udata). This type of reference is an offset |
1288 | | // from the first byte of the compilation header for the compilation unit |
1289 | | // containing the reference. |
1290 | | // - the reference can identify any debugging information entry within a |
1291 | | // .debug_info section; in particular, it may refer to an entry in a |
1292 | | // different compilation unit (DW_FORM_ref_addr) |
1293 | | // - the reference can identify any debugging information type entry that |
1294 | | // has been placed in its own type unit. |
1295 | | // Not applicable for DW_AT_abstract_origin. |
1296 | 0 | location.name = (*abstract_origin_ref_type != DW_FORM_ref_addr) |
1297 | 0 | ? get_function_name(cu, cu.offset + *abstract_origin) |
1298 | 0 | : get_function_name(findCompilationUnit(*abstract_origin), |
1299 | 0 | *abstract_origin); |
1300 | | |
1301 | | /// FIXME: see comment above |
1302 | 0 | if (die_for_inline_broken) { |
1303 | 0 | return false; |
1304 | 0 | } |
1305 | | |
1306 | 0 | locations.push_back(location); |
1307 | |
|
1308 | 0 | findInlinedSubroutineDieForAddress(cu, child_die, line_vm, address, base_addr_cu, locations, |
1309 | 0 | max_size); |
1310 | |
|
1311 | 0 | return false; |
1312 | 0 | }); |
1313 | 0 | } |
1314 | | |
1315 | | bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo, LocationInfoMode mode, |
1316 | 2.05M | std::vector<SymbolizedFrame>& inline_frames) const { |
1317 | 2.05M | locationInfo = LocationInfo(); |
1318 | | |
1319 | 2.05M | if (mode == LocationInfoMode::DISABLED) { |
1320 | 2.05M | return false; |
1321 | 2.05M | } |
1322 | | |
1323 | 987 | if (!elf_) { // No file. |
1324 | 19 | return false; |
1325 | 19 | } |
1326 | | |
1327 | 968 | if (!aranges_.empty()) { |
1328 | | // Fast path: find the right .debug_info entry by looking up the |
1329 | | // address in .debug_aranges. |
1330 | 968 | uint64_t offset = 0; |
1331 | 968 | if (findDebugInfoOffset(address, aranges_, offset)) { |
1332 | | // Read compilation unit header from .debug_info |
1333 | 681 | auto unit = getCompilationUnit(offset); |
1334 | 681 | if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) { |
1335 | 0 | return false; |
1336 | 0 | } |
1337 | 681 | findLocation(address, mode, unit, locationInfo, inline_frames); |
1338 | 681 | return locationInfo.has_file_and_line; |
1339 | 681 | } else if (mode == LocationInfoMode::FAST) { |
1340 | | // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries |
1341 | | // in .debug_aranges for some functions, but always generates |
1342 | | // .debug_info entries. Scanning .debug_info is slow, so fall back to |
1343 | | // it only if such behavior is requested via LocationInfoMode. |
1344 | 287 | return false; |
1345 | 287 | } else { |
1346 | 0 | SAFE_CHECK(mode == LocationInfoMode::FULL || mode == LocationInfoMode::FULL_WITH_INLINE, |
1347 | 0 | "unexpected mode"); |
1348 | | // Fall back to the linear scan. |
1349 | 0 | } |
1350 | 968 | } |
1351 | | |
1352 | | // Slow path (linear scan): Iterate over all .debug_info entries |
1353 | | // and look for the address in each compilation unit. |
1354 | 0 | uint64_t offset = 0; |
1355 | 0 | while (offset < info_.size() && !locationInfo.has_file_and_line) { |
1356 | 0 | auto unit = getCompilationUnit(offset); |
1357 | 0 | offset += unit.size; |
1358 | 0 | if (unit.unit_type != DW_UT_compile && unit.unit_type != DW_UT_skeleton) { |
1359 | 0 | continue; |
1360 | 0 | } |
1361 | 0 | findLocation(address, mode, unit, locationInfo, inline_frames); |
1362 | 0 | } |
1363 | |
|
1364 | 0 | return locationInfo.has_file_and_line; |
1365 | 968 | } |
1366 | | |
1367 | | bool Dwarf::isAddrInRangeList(const CompilationUnit& cu, uint64_t address, |
1368 | | std::optional<uint64_t> base_addr, size_t offset, |
1369 | 0 | uint8_t addr_size) const { |
1370 | 0 | SAFE_CHECK(addr_size == 4 || addr_size == 8, "wrong address size"); |
1371 | 0 | if (cu.version <= 4 && !ranges_.empty()) { |
1372 | 0 | const bool is64_bit_addr = addr_size == 8; |
1373 | 0 | std::string_view sp = ranges_; |
1374 | 0 | sp.remove_prefix(offset); |
1375 | 0 | const uint64_t max_addr = is64_bit_addr ? std::numeric_limits<uint64_t>::max() |
1376 | 0 | : std::numeric_limits<uint32_t>::max(); |
1377 | 0 | while (!sp.empty()) { |
1378 | 0 | uint64_t begin = readOffset(sp, is64_bit_addr); |
1379 | 0 | uint64_t end = readOffset(sp, is64_bit_addr); |
1380 | | // The range list entry is a base address selection entry. |
1381 | 0 | if (begin == max_addr) { |
1382 | 0 | base_addr = end; |
1383 | 0 | continue; |
1384 | 0 | } |
1385 | | // The range list entry is an end of list entry. |
1386 | 0 | if (begin == 0 && end == 0) { |
1387 | 0 | break; |
1388 | 0 | } |
1389 | | |
1390 | | // Check if the given address falls in the range list entry. |
1391 | | // 2.17.3 Non-Contiguous Address Ranges |
1392 | | // The applicable base address of a range list entry is determined by the |
1393 | | // closest preceding base address selection entry (see below) in the same |
1394 | | // range list. If there is no such selection entry, then the applicable |
1395 | | // base address defaults to the base address of the compilation unit. |
1396 | 0 | if (base_addr && address >= begin + *base_addr && address < end + *base_addr) { |
1397 | 0 | return true; |
1398 | 0 | } |
1399 | 0 | } |
1400 | 0 | } |
1401 | | |
1402 | 0 | if (cu.version == 5 && !rnglists_.empty() && cu.addr_base.has_value()) { |
1403 | 0 | auto rnglists = rnglists_; |
1404 | 0 | rnglists.remove_prefix(offset); |
1405 | |
|
1406 | 0 | while (!rnglists.empty()) { |
1407 | 0 | auto kind = read<uint8_t>(rnglists); |
1408 | 0 | switch (kind) { |
1409 | 0 | case DW_RLE_end_of_list: |
1410 | 0 | return false; |
1411 | 0 | case DW_RLE_base_addressx: { |
1412 | 0 | auto index = readULEB(rnglists); |
1413 | 0 | auto sp = addr_.substr(*cu.addr_base + index * sizeof(uint64_t)); |
1414 | 0 | base_addr = read<uint64_t>(sp); |
1415 | 0 | } break; |
1416 | | |
1417 | 0 | case DW_RLE_startx_endx: { |
1418 | 0 | auto index_start = readULEB(rnglists); |
1419 | 0 | auto index_end = readULEB(rnglists); |
1420 | 0 | auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); |
1421 | 0 | auto start = read<uint64_t>(sp_start); |
1422 | |
|
1423 | 0 | auto sp_end = addr_.substr(*cu.addr_base + index_end * sizeof(uint64_t)); |
1424 | 0 | auto end = read<uint64_t>(sp_end); |
1425 | 0 | if (address >= start && address < end) { |
1426 | 0 | return true; |
1427 | 0 | } |
1428 | 0 | } break; |
1429 | | |
1430 | 0 | case DW_RLE_startx_length: { |
1431 | 0 | auto index_start = readULEB(rnglists); |
1432 | 0 | auto length = readULEB(rnglists); |
1433 | 0 | auto sp_start = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t)); |
1434 | 0 | auto start = read<uint64_t>(sp_start); |
1435 | |
|
1436 | 0 | auto sp_end = addr_.substr(*cu.addr_base + index_start * sizeof(uint64_t) + length); |
1437 | 0 | auto end = read<uint64_t>(sp_end); |
1438 | 0 | if (start != end && address >= start && address < end) { |
1439 | 0 | return true; |
1440 | 0 | } |
1441 | 0 | } break; |
1442 | | |
1443 | 0 | case DW_RLE_offset_pair: { |
1444 | 0 | auto offset_start = readULEB(rnglists); |
1445 | 0 | auto offset_end = readULEB(rnglists); |
1446 | 0 | if (base_addr && address >= (*base_addr + offset_start) && |
1447 | 0 | address < (*base_addr + offset_end)) { |
1448 | 0 | return true; |
1449 | 0 | } |
1450 | 0 | } break; |
1451 | | |
1452 | 0 | case DW_RLE_base_address: |
1453 | 0 | base_addr = read<uint64_t>(rnglists); |
1454 | 0 | break; |
1455 | | |
1456 | 0 | case DW_RLE_start_end: { |
1457 | 0 | uint64_t start = read<uint64_t>(rnglists); |
1458 | 0 | uint64_t end = read<uint64_t>(rnglists); |
1459 | 0 | if (address >= start && address < end) { |
1460 | 0 | return true; |
1461 | 0 | } |
1462 | 0 | } break; |
1463 | | |
1464 | 0 | case DW_RLE_start_length: { |
1465 | 0 | uint64_t start = read<uint64_t>(rnglists); |
1466 | 0 | uint64_t end = start + readULEB(rnglists); |
1467 | 0 | if (address >= start && address < end) { |
1468 | 0 | return true; |
1469 | 0 | } |
1470 | 0 | } break; |
1471 | | |
1472 | 0 | default: |
1473 | 0 | SAFE_CHECK(false, "Unexpected debug_rnglists entry kind"); |
1474 | 0 | } |
1475 | 0 | } |
1476 | 0 | } |
1477 | 0 | return false; |
1478 | 0 | } |
1479 | | |
1480 | | Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory, |
1481 | | std::string_view debugStr, std::string_view debugLineStr) |
1482 | 681 | : compilationDirectory_(compilationDirectory), |
1483 | 681 | debugStr_(debugStr), |
1484 | 681 | debugLineStr_(debugLineStr) { |
1485 | 681 | Section section(data); |
1486 | 681 | SAFE_CHECK(section.next(data_), "invalid line number VM"); |
1487 | 681 | is64Bit_ = section.is64Bit(); |
1488 | 681 | init(); |
1489 | 681 | reset(); |
1490 | 681 | } |
1491 | | |
1492 | 267k | void Dwarf::LineNumberVM::reset() { |
1493 | 267k | address_ = 0; |
1494 | 267k | file_ = 1; |
1495 | 267k | line_ = 1; |
1496 | 267k | column_ = 0; |
1497 | 267k | isStmt_ = defaultIsStmt_; |
1498 | 267k | basicBlock_ = false; |
1499 | 267k | endSequence_ = false; |
1500 | 267k | prologueEnd_ = false; |
1501 | 267k | epilogueBegin_ = false; |
1502 | 267k | isa_ = 0; |
1503 | 267k | discriminator_ = 0; |
1504 | 267k | } |
1505 | | |
1506 | | struct LineNumberAttribute { |
1507 | | uint64_t content_type_code; |
1508 | | uint64_t form_code; |
1509 | | std::variant<uint64_t, std::string_view> attr_value; |
1510 | | }; |
1511 | | |
1512 | | LineNumberAttribute readLineNumberAttribute(bool is64_bit, std::string_view& format, |
1513 | | std::string_view& entries, std::string_view debugStr, |
1514 | 825k | std::string_view debugLineStr) { |
1515 | 825k | uint64_t content_type_code = readULEB(format); |
1516 | 825k | uint64_t form_code = readULEB(format); |
1517 | 825k | std::variant<uint64_t, std::string_view> attr_value; |
1518 | | |
1519 | 825k | switch (content_type_code) { |
1520 | 432k | case DW_LNCT_path: { |
1521 | 432k | switch (form_code) { |
1522 | 0 | case DW_FORM_string: |
1523 | 0 | attr_value = readNullTerminated(entries); |
1524 | 0 | break; |
1525 | 432k | case DW_FORM_line_strp: { |
1526 | 432k | auto off = readOffset(entries, is64_bit); |
1527 | 432k | attr_value = getStringFromStringSection(debugLineStr, off); |
1528 | 432k | } break; |
1529 | 0 | case DW_FORM_strp: |
1530 | 0 | attr_value = getStringFromStringSection(debugStr, readOffset(entries, is64_bit)); |
1531 | 0 | break; |
1532 | 0 | case DW_FORM_strp_sup: |
1533 | 0 | SAFE_CHECK(false, "Unexpected DW_FORM_strp_sup"); |
1534 | 0 | break; |
1535 | 0 | default: |
1536 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_path"); |
1537 | 0 | break; |
1538 | 432k | } |
1539 | 432k | } break; |
1540 | | |
1541 | 432k | case DW_LNCT_directory_index: { |
1542 | 393k | switch (form_code) { |
1543 | 0 | case DW_FORM_data1: |
1544 | 0 | attr_value = read<uint8_t>(entries); |
1545 | 0 | break; |
1546 | 0 | case DW_FORM_data2: |
1547 | 0 | attr_value = read<uint16_t>(entries); |
1548 | 0 | break; |
1549 | 393k | case DW_FORM_udata: |
1550 | 393k | attr_value = readULEB(entries); |
1551 | 393k | break; |
1552 | 0 | default: |
1553 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_directory_index"); |
1554 | 0 | break; |
1555 | 393k | } |
1556 | 393k | } break; |
1557 | | |
1558 | 393k | case DW_LNCT_timestamp: { |
1559 | 0 | switch (form_code) { |
1560 | 0 | case DW_FORM_udata: |
1561 | 0 | attr_value = readULEB(entries); |
1562 | 0 | break; |
1563 | 0 | case DW_FORM_data4: |
1564 | 0 | attr_value = read<uint32_t>(entries); |
1565 | 0 | break; |
1566 | 0 | case DW_FORM_data8: |
1567 | 0 | attr_value = read<uint64_t>(entries); |
1568 | 0 | break; |
1569 | 0 | case DW_FORM_block: |
1570 | 0 | attr_value = readBytes(entries, readULEB(entries)); |
1571 | 0 | break; |
1572 | 0 | default: |
1573 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_timestamp"); |
1574 | 0 | } |
1575 | 0 | } break; |
1576 | | |
1577 | 0 | case DW_LNCT_size: { |
1578 | 0 | switch (form_code) { |
1579 | 0 | case DW_FORM_udata: |
1580 | 0 | attr_value = readULEB(entries); |
1581 | 0 | break; |
1582 | 0 | case DW_FORM_data1: |
1583 | 0 | attr_value = read<uint8_t>(entries); |
1584 | 0 | break; |
1585 | 0 | case DW_FORM_data2: |
1586 | 0 | attr_value = read<uint16_t>(entries); |
1587 | 0 | break; |
1588 | 0 | case DW_FORM_data4: |
1589 | 0 | attr_value = read<uint32_t>(entries); |
1590 | 0 | break; |
1591 | 0 | case DW_FORM_data8: |
1592 | 0 | attr_value = read<uint64_t>(entries); |
1593 | 0 | break; |
1594 | 0 | default: |
1595 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_size"); |
1596 | 0 | break; |
1597 | 0 | } |
1598 | 0 | } break; |
1599 | | |
1600 | 0 | case DW_LNCT_MD5: { |
1601 | 0 | switch (form_code) { |
1602 | 0 | case DW_FORM_data16: |
1603 | 0 | attr_value = readBytes(entries, 16); |
1604 | 0 | break; |
1605 | 0 | default: |
1606 | 0 | SAFE_CHECK(false, "Unexpected form for DW_LNCT_MD5"); |
1607 | 0 | break; |
1608 | 0 | } |
1609 | 0 | } break; |
1610 | | |
1611 | 0 | default: |
1612 | | // TODO: skip over vendor data as specified by the form instead. |
1613 | 0 | SAFE_CHECK(false, "Unexpected vendor content type code"); |
1614 | 0 | break; |
1615 | 825k | } |
1616 | 825k | return { |
1617 | 825k | .content_type_code = content_type_code, |
1618 | 825k | .form_code = form_code, |
1619 | 825k | .attr_value = attr_value, |
1620 | 825k | }; |
1621 | 825k | } |
1622 | | |
1623 | 681 | void Dwarf::LineNumberVM::init() { |
1624 | 681 | version_ = read<uint16_t>(data_); |
1625 | 681 | SAFE_CHECK(version_ >= 2 && version_ <= 5, "invalid version in line number VM: {}", version_); |
1626 | 681 | if (version_ == 5) { |
1627 | 681 | auto address_size = read<uint8_t>(data_); |
1628 | 681 | SAFE_CHECK(address_size == sizeof(uintptr_t), "Unexpected Line Number Table address_size"); |
1629 | 681 | auto segment_selector_size = read<uint8_t>(data_); |
1630 | 681 | SAFE_CHECK(segment_selector_size == 0, "Segments not supported"); |
1631 | 681 | } |
1632 | 681 | uint64_t header_length = readOffset(data_, is64Bit_); |
1633 | 681 | SAFE_CHECK(header_length <= data_.size(), "invalid line number VM header length"); |
1634 | 681 | std::string_view header(data_.data(), header_length); |
1635 | 681 | data_ = std::string_view(header.end(), data_.end() - header.end()); |
1636 | | |
1637 | 681 | minLength_ = read<uint8_t>(header); |
1638 | 681 | if (version_ >= 4) { // Version 2 and 3 records don't have this |
1639 | 681 | uint8_t max_ops_per_instruction = read<uint8_t>(header); |
1640 | 681 | SAFE_CHECK(max_ops_per_instruction == 1, "VLIW not supported"); |
1641 | 681 | } |
1642 | 681 | defaultIsStmt_ = read<uint8_t>(header); |
1643 | 681 | lineBase_ = read<int8_t>(header); // yes, signed |
1644 | 681 | lineRange_ = read<uint8_t>(header); |
1645 | 681 | opcodeBase_ = read<uint8_t>(header); |
1646 | 681 | SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base"); |
1647 | 681 | standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data()); |
1648 | 681 | header.remove_prefix(opcodeBase_ - 1); |
1649 | | |
1650 | 681 | if (version_ <= 4) { |
1651 | | // We don't want to use heap, so we don't keep an unbounded amount of state. |
1652 | | // We'll just skip over include directories and file names here, and |
1653 | | // we'll loop again when we actually need to retrieve one. |
1654 | 0 | std::string_view sp; |
1655 | 0 | const char* tmp = header.data(); |
1656 | 0 | v4_.includeDirectoryCount = 0; |
1657 | 0 | while (!(sp = readNullTerminated(header)).empty()) { |
1658 | 0 | ++v4_.includeDirectoryCount; |
1659 | 0 | } |
1660 | 0 | v4_.includeDirectories = {tmp, header.data()}; |
1661 | |
|
1662 | 0 | tmp = header.data(); |
1663 | 0 | FileName fn; |
1664 | 0 | v4_.fileNameCount = 0; |
1665 | 0 | while (readFileName(header, fn)) { |
1666 | 0 | ++v4_.fileNameCount; |
1667 | 0 | } |
1668 | 0 | v4_.fileNames = {tmp, header.data()}; |
1669 | 681 | } else if (version_ == 5) { |
1670 | 681 | v5_.directoryEntryFormatCount = read<uint8_t>(header); |
1671 | 681 | const char* tmp = header.data(); |
1672 | 1.36k | for (uint8_t i = 0; i < v5_.directoryEntryFormatCount; i++) { |
1673 | | // A sequence of directory entry format descriptions. Each description |
1674 | | // consists of a pair of ULEB128 values: |
1675 | 681 | readULEB(header); // A content type code |
1676 | 681 | readULEB(header); // A form code using the attribute form codes |
1677 | 681 | } |
1678 | 681 | v5_.directoryEntryFormat = {tmp, header.data()}; |
1679 | 681 | v5_.directoriesCount = readULEB(header); |
1680 | 681 | tmp = header.data(); |
1681 | 38.0k | for (uint64_t i = 0; i < v5_.directoriesCount; i++) { |
1682 | 37.3k | std::string_view format = v5_.directoryEntryFormat; |
1683 | 74.7k | for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) { |
1684 | 37.3k | readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); |
1685 | 37.3k | } |
1686 | 37.3k | } |
1687 | 681 | v5_.directories = {tmp, header.data()}; |
1688 | | |
1689 | 681 | v5_.fileNameEntryFormatCount = read<uint8_t>(header); |
1690 | 681 | tmp = header.data(); |
1691 | 2.04k | for (uint8_t i = 0; i < v5_.fileNameEntryFormatCount; i++) { |
1692 | | // A sequence of file entry format descriptions. Each description |
1693 | | // consists of a pair of ULEB128 values: |
1694 | 1.36k | readULEB(header); // A content type code |
1695 | 1.36k | readULEB(header); // A form code using the attribute form codes |
1696 | 1.36k | } |
1697 | 681 | v5_.fileNameEntryFormat = {tmp, header.data()}; |
1698 | 681 | v5_.fileNamesCount = readULEB(header); |
1699 | 681 | tmp = header.data(); |
1700 | 197k | for (uint64_t i = 0; i < v5_.fileNamesCount; i++) { |
1701 | 196k | std::string_view format = v5_.fileNameEntryFormat; |
1702 | 589k | for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) { |
1703 | 393k | readLineNumberAttribute(is64Bit_, format, header, debugStr_, debugLineStr_); |
1704 | 393k | } |
1705 | 196k | } |
1706 | 681 | v5_.fileNames = {tmp, header.data()}; |
1707 | 681 | } |
1708 | 681 | } |
1709 | | |
1710 | 14.8M | bool Dwarf::LineNumberVM::next(std::string_view& program) { |
1711 | 14.8M | Dwarf::LineNumberVM::StepResult ret; |
1712 | 50.0M | do { |
1713 | 50.0M | ret = step(program); |
1714 | 50.0M | } while (ret == CONTINUE); |
1715 | | |
1716 | 14.8M | return (ret == COMMIT); |
1717 | 14.8M | } |
1718 | | |
1719 | 681 | Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const { |
1720 | 681 | if (version_ <= 4) { |
1721 | 0 | SAFE_CHECK(index != 0, "invalid file index 0"); |
1722 | 0 | FileName fn; |
1723 | 0 | if (index <= v4_.fileNameCount) { |
1724 | 0 | std::string_view file_names = v4_.fileNames; |
1725 | 0 | for (; index; --index) { |
1726 | 0 | if (!readFileName(file_names, fn)) { |
1727 | 0 | abort(); |
1728 | 0 | } |
1729 | 0 | } |
1730 | 0 | return fn; |
1731 | 0 | } |
1732 | | |
1733 | 0 | index -= v4_.fileNameCount; |
1734 | |
|
1735 | 0 | std::string_view program = data_; |
1736 | 0 | for (; index; --index) { |
1737 | 0 | SAFE_CHECK(nextDefineFile(program, fn), "invalid file index"); |
1738 | 0 | } |
1739 | |
|
1740 | 0 | return fn; |
1741 | 681 | } else { |
1742 | 681 | FileName fn; |
1743 | 681 | SAFE_CHECK(index < v5_.fileNamesCount, "invalid file index"); |
1744 | 681 | std::string_view file_names = v5_.fileNames; |
1745 | 197k | for (uint64_t i = 0; i < v5_.fileNamesCount; i++) { |
1746 | 196k | std::string_view format = v5_.fileNameEntryFormat; |
1747 | 589k | for (uint8_t f = 0; f < v5_.fileNameEntryFormatCount; f++) { |
1748 | 393k | auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_, |
1749 | 393k | debugLineStr_); |
1750 | 393k | if (i == index) { |
1751 | 1.36k | switch (attr.content_type_code) { |
1752 | 681 | case DW_LNCT_path: |
1753 | 681 | fn.relativeName = std::get<std::string_view>(attr.attr_value); |
1754 | 681 | break; |
1755 | 681 | case DW_LNCT_directory_index: |
1756 | 681 | fn.directoryIndex = std::get<uint64_t>(attr.attr_value); |
1757 | 681 | break; |
1758 | 1.36k | } |
1759 | 1.36k | } |
1760 | 393k | } |
1761 | 196k | } |
1762 | 681 | return fn; |
1763 | 681 | } |
1764 | 681 | } |
1765 | | |
1766 | 681 | std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const { |
1767 | 681 | if (version_ <= 4) { |
1768 | 0 | if (index == 0) { |
1769 | | // In DWARF <= 4 the current directory is not represented in the |
1770 | | // directories field and a directory index of 0 implicitly referred to |
1771 | | // that directory as found in the DW_AT_comp_dir attribute of the |
1772 | | // compilation unit debugging information entry. |
1773 | 0 | return {}; |
1774 | 0 | } |
1775 | | |
1776 | 0 | SAFE_CHECK(index <= v4_.includeDirectoryCount, "invalid include directory"); |
1777 | |
|
1778 | 0 | std::string_view include_directories = v4_.includeDirectories; |
1779 | 0 | std::string_view dir; |
1780 | 0 | for (; index; --index) { |
1781 | 0 | dir = readNullTerminated(include_directories); |
1782 | 0 | if (dir.empty()) { |
1783 | 0 | abort(); // BUG |
1784 | 0 | } |
1785 | 0 | } |
1786 | | |
1787 | 0 | return dir; |
1788 | 681 | } else { |
1789 | 681 | SAFE_CHECK(index < v5_.directoriesCount, "invalid file index"); |
1790 | 681 | std::string_view directories = v5_.directories; |
1791 | 2.33k | for (uint64_t i = 0; i < v5_.directoriesCount; i++) { |
1792 | 2.33k | std::string_view format = v5_.directoryEntryFormat; |
1793 | 3.98k | for (uint8_t f = 0; f < v5_.directoryEntryFormatCount; f++) { |
1794 | 2.33k | auto attr = readLineNumberAttribute(is64Bit_, format, directories, debugStr_, |
1795 | 2.33k | debugLineStr_); |
1796 | 2.33k | if (i == index && attr.content_type_code == DW_LNCT_path) { |
1797 | 681 | return std::get<std::string_view>(attr.attr_value); |
1798 | 681 | } |
1799 | 2.33k | } |
1800 | 2.33k | } |
1801 | | // This could only happen if DWARF5's directory_entry_format doesn't contain |
1802 | | // a DW_LNCT_path. Highly unlikely, but we shouldn't crash. |
1803 | 0 | return std::string_view("<directory not found>"); |
1804 | 681 | } |
1805 | 681 | } |
1806 | | |
1807 | 0 | bool Dwarf::LineNumberVM::readFileName(std::string_view& program, FileName& fn) { |
1808 | 0 | fn.relativeName = readNullTerminated(program); |
1809 | 0 | if (fn.relativeName.empty()) { |
1810 | 0 | return false; |
1811 | 0 | } |
1812 | 0 | fn.directoryIndex = readULEB(program); |
1813 | | // Skip over file size and last modified time |
1814 | 0 | readULEB(program); |
1815 | 0 | readULEB(program); |
1816 | 0 | return true; |
1817 | 0 | } |
1818 | | |
1819 | 0 | bool Dwarf::LineNumberVM::nextDefineFile(std::string_view& program, FileName& fn) const { |
1820 | 0 | while (!program.empty()) { |
1821 | 0 | auto opcode = read<uint8_t>(program); |
1822 | |
|
1823 | 0 | if (opcode >= opcodeBase_) { // special opcode |
1824 | 0 | continue; |
1825 | 0 | } |
1826 | | |
1827 | 0 | if (opcode != 0) { // standard opcode |
1828 | | // Skip, slurp the appropriate number of LEB arguments |
1829 | 0 | uint8_t arg_count = standardOpcodeLengths_[opcode - 1]; |
1830 | 0 | while (arg_count--) { |
1831 | 0 | readULEB(program); |
1832 | 0 | } |
1833 | 0 | continue; |
1834 | 0 | } |
1835 | | |
1836 | | // Extended opcode |
1837 | 0 | auto length = readULEB(program); |
1838 | | // the opcode itself should be included in the length, so length >= 1 |
1839 | 0 | SAFE_CHECK(length != 0, "invalid extended opcode length"); |
1840 | 0 | read<uint8_t>(program); // extended opcode |
1841 | 0 | --length; |
1842 | |
|
1843 | 0 | if (opcode == DW_LNE_define_file) { |
1844 | 0 | SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); |
1845 | 0 | SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file"); |
1846 | 0 | return true; |
1847 | 0 | } |
1848 | | |
1849 | 0 | program.remove_prefix(length); |
1850 | 0 | } |
1851 | | |
1852 | 0 | return false; |
1853 | 0 | } |
1854 | | |
1855 | 50.0M | Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view& program) { |
1856 | 50.0M | auto opcode = read<uint8_t>(program); |
1857 | | |
1858 | 50.0M | if (opcode >= opcodeBase_) { // special opcode |
1859 | 13.2M | uint8_t adjusted_opcode = opcode - opcodeBase_; |
1860 | 13.2M | uint8_t op_advance = adjusted_opcode / lineRange_; |
1861 | | |
1862 | 13.2M | address_ += minLength_ * op_advance; |
1863 | 13.2M | line_ += lineBase_ + adjusted_opcode % lineRange_; |
1864 | | |
1865 | 13.2M | basicBlock_ = false; |
1866 | 13.2M | prologueEnd_ = false; |
1867 | 13.2M | epilogueBegin_ = false; |
1868 | 13.2M | discriminator_ = 0; |
1869 | 13.2M | return COMMIT; |
1870 | 13.2M | } |
1871 | | |
1872 | 36.8M | if (opcode != 0) { // standard opcode |
1873 | | // Only interpret opcodes that are recognized by the version we're parsing; |
1874 | | // the others are vendor extensions and we should ignore them. |
1875 | 36.2M | switch (opcode) { |
1876 | 1.35M | case DW_LNS_copy: |
1877 | 1.35M | basicBlock_ = false; |
1878 | 1.35M | prologueEnd_ = false; |
1879 | 1.35M | epilogueBegin_ = false; |
1880 | 1.35M | discriminator_ = 0; |
1881 | 1.35M | return COMMIT; |
1882 | 1.67M | case DW_LNS_advance_pc: |
1883 | 1.67M | address_ += minLength_ * readULEB(program); |
1884 | 1.67M | return CONTINUE; |
1885 | 11.5M | case DW_LNS_advance_line: |
1886 | 11.5M | line_ += readSLEB(program); |
1887 | 11.5M | return CONTINUE; |
1888 | 2.79M | case DW_LNS_set_file: |
1889 | 2.79M | file_ = readULEB(program); |
1890 | 2.79M | return CONTINUE; |
1891 | 8.07M | case DW_LNS_set_column: |
1892 | 8.07M | column_ = readULEB(program); |
1893 | 8.07M | return CONTINUE; |
1894 | 7.22M | case DW_LNS_negate_stmt: |
1895 | 7.22M | isStmt_ = !isStmt_; |
1896 | 7.22M | return CONTINUE; |
1897 | 0 | case DW_LNS_set_basic_block: |
1898 | 0 | basicBlock_ = true; |
1899 | 0 | return CONTINUE; |
1900 | 2.99M | case DW_LNS_const_add_pc: |
1901 | 2.99M | address_ += minLength_ * ((255 - opcodeBase_) / lineRange_); |
1902 | 2.99M | return CONTINUE; |
1903 | 0 | case DW_LNS_fixed_advance_pc: |
1904 | 0 | address_ += read<uint16_t>(program); |
1905 | 0 | return CONTINUE; |
1906 | 272k | case DW_LNS_set_prologue_end: |
1907 | 272k | if (version_ == 2) { |
1908 | 0 | break; // not supported in version 2 |
1909 | 0 | } |
1910 | 272k | prologueEnd_ = true; |
1911 | 272k | return CONTINUE; |
1912 | 287k | case DW_LNS_set_epilogue_begin: |
1913 | 287k | if (version_ == 2) { |
1914 | 0 | break; // not supported in version 2 |
1915 | 0 | } |
1916 | 287k | epilogueBegin_ = true; |
1917 | 287k | return CONTINUE; |
1918 | 0 | case DW_LNS_set_isa: |
1919 | 0 | if (version_ == 2) { |
1920 | 0 | break; // not supported in version 2 |
1921 | 0 | } |
1922 | 0 | isa_ = readULEB(program); |
1923 | 0 | return CONTINUE; |
1924 | 36.2M | } |
1925 | | |
1926 | | // Unrecognized standard opcode, slurp the appropriate number of LEB |
1927 | | // arguments. |
1928 | 0 | uint8_t arg_count = standardOpcodeLengths_[opcode - 1]; |
1929 | 0 | while (arg_count--) { |
1930 | 0 | readULEB(program); |
1931 | 0 | } |
1932 | 0 | return CONTINUE; |
1933 | 36.2M | } |
1934 | | |
1935 | | // Extended opcode |
1936 | 547k | auto length = readULEB(program); |
1937 | | // the opcode itself should be included in the length, so length >= 1 |
1938 | 547k | SAFE_CHECK(length != 0, "invalid extended opcode length"); |
1939 | 547k | auto extended_opcode = read<uint8_t>(program); |
1940 | 547k | --length; |
1941 | | |
1942 | 547k | switch (extended_opcode) { |
1943 | 266k | case DW_LNE_end_sequence: |
1944 | 266k | return END; |
1945 | 266k | case DW_LNE_set_address: |
1946 | 266k | address_ = read<uintptr_t>(program); |
1947 | 266k | return CONTINUE; |
1948 | 0 | case DW_LNE_define_file: |
1949 | 0 | SAFE_CHECK(version_ < 5, "DW_LNE_define_file deprecated in DWARF5"); |
1950 | | // We can't process DW_LNE_define_file here, as it would require us to |
1951 | | // use unbounded amounts of state (ie. use the heap). We'll do a second |
1952 | | // pass (using nextDefineFile()) if necessary. |
1953 | 0 | break; |
1954 | 14.5k | case DW_LNE_set_discriminator: |
1955 | 14.5k | discriminator_ = readULEB(program); |
1956 | 14.5k | return CONTINUE; |
1957 | 547k | } |
1958 | | |
1959 | | // Unrecognized extended opcode |
1960 | 0 | program.remove_prefix(length); |
1961 | 0 | return CONTINUE; |
1962 | 547k | } |
1963 | | |
1964 | 681 | Dwarf::Path Dwarf::LineNumberVM::getFullFileName(uint64_t index) const { |
1965 | 681 | auto fn = getFileName(index); |
1966 | | // DWARF <= 4: the current dir is not represented in the CU's Line Number |
1967 | | // Program Header and relies on the CU's DW_AT_comp_dir. |
1968 | | // DWARF 5: the current directory is explicitly present. |
1969 | 681 | const std::string_view base_dir = version_ == 5 ? "" : compilationDirectory_; |
1970 | 681 | return Path(base_dir, getIncludeDirectory(fn.directoryIndex), fn.relativeName); |
1971 | 681 | } |
1972 | | |
1973 | 681 | bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file, uint64_t& line) { |
1974 | 681 | std::string_view program = data_; |
1975 | | |
1976 | | // Within each sequence of instructions, the address may only increase. |
1977 | | // Unfortunately, within the same compilation unit, sequences may appear |
1978 | | // in any order. So any sequence is a candidate if it starts at an address |
1979 | | // <= the target address, and we know we've found the target address if |
1980 | | // a candidate crosses the target address. |
1981 | 681 | enum State { |
1982 | 681 | START, |
1983 | 681 | LOW_SEQ, // candidate |
1984 | 681 | HIGH_SEQ |
1985 | 681 | }; |
1986 | 681 | State state = START; |
1987 | 681 | reset(); |
1988 | | |
1989 | 681 | uint64_t prev_file = 0; |
1990 | 681 | uint64_t prev_line = 0; |
1991 | 14.8M | while (!program.empty()) { |
1992 | 14.8M | bool seq_end = !next(program); |
1993 | | |
1994 | 14.8M | if (state == START) { |
1995 | 266k | if (!seq_end) { |
1996 | 266k | state = address_ <= target ? LOW_SEQ : HIGH_SEQ; |
1997 | 266k | } |
1998 | 266k | } |
1999 | | |
2000 | 14.8M | if (state == LOW_SEQ) { |
2001 | 14.7M | if (address_ > target) { |
2002 | | // Found it! Note that ">" is indeed correct (not ">="), as each |
2003 | | // sequence is guaranteed to have one entry past-the-end (emitted by |
2004 | | // DW_LNE_end_sequence) |
2005 | | // |
2006 | | // NOTE: In DWARF <= 4 the file register is non-zero. |
2007 | | // See DWARF 4: 6.2.4 The Line Number Program Header |
2008 | | // "The line number program assigns numbers to each of the file |
2009 | | // entries in order, beginning with 1, and uses those numbers instead |
2010 | | // of file names in the file register." |
2011 | | // DWARF 5 has a different include directory/file header and 0 is valid. |
2012 | 681 | if (version_ <= 4 && prev_file == 0) { |
2013 | 0 | return false; |
2014 | 0 | } |
2015 | 681 | file = getFullFileName(prev_file); |
2016 | 681 | line = prev_line; |
2017 | 681 | return true; |
2018 | 681 | } |
2019 | 14.7M | prev_file = file_; |
2020 | 14.7M | prev_line = line_; |
2021 | 14.7M | } |
2022 | | |
2023 | 14.8M | if (seq_end) { |
2024 | 266k | state = START; |
2025 | 266k | reset(); |
2026 | 266k | } |
2027 | 14.8M | } |
2028 | | |
2029 | 0 | return false; |
2030 | 681 | } |
2031 | | |
2032 | | #include "common/compile_check_avoid_end.h" |
2033 | | } // namespace doris |
2034 | | |
2035 | | #endif |