Coverage Report

Created: 2024-11-18 12:21

/root/doris/be/src/common/stack_trace.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StackTrace.cpp
19
// and modified by Doris
20
21
#include "common/stack_trace.h"
22
23
#include <common/dwarf.h>
24
#include <common/elf.h>
25
#include <common/memory_sanitizer.h>
26
#include <common/symbol_index.h>
27
#include <fmt/format.h>
28
29
#include <atomic>
30
#include <filesystem>
31
#include <map>
32
#include <mutex>
33
#include <sstream>
34
#include <unordered_map>
35
36
#include "config.h"
37
#include "util/string_util.h"
38
#include "vec/common/demangle.h"
39
#include "vec/common/hex.h"
40
41
#if USE_UNWIND && defined(__x86_64__)
42
#include <libunwind.h>
43
#else
44
#include <execinfo.h>
45
#endif
46
47
namespace {
48
/// Currently this variable is set up once on server startup.
49
/// But we use atomic just in case, so it is possible to be modified at runtime.
50
std::atomic<bool> show_addresses = true;
51
52
// #if defined(__ELF__) && !defined(__FreeBSD__)
53
// void writePointerHex(const void* ptr, std::stringstream& buf) {
54
//     buf.write("0x", 2);
55
//     char hex_str[2 * sizeof(ptr)];
56
//     doris::vectorized::write_hex_uint_lowercase(reinterpret_cast<uintptr_t>(ptr), hex_str);
57
//     buf.write(hex_str, 2 * sizeof(ptr));
58
// }
59
// #endif
60
61
0
bool shouldShowAddress(const void* addr) {
62
    /// If the address is less than 4096, most likely it is a nullptr dereference with offset,
63
    /// and showing this offset is secure nevertheless.
64
    /// NOTE: 4096 is the page size on x86 and it can be different on other systems,
65
    /// but for the purpose of this branch, it does not matter.
66
0
    if (reinterpret_cast<uintptr_t>(addr) < 4096) {
67
0
        return true;
68
0
    }
69
70
0
    return show_addresses.load(std::memory_order_relaxed);
71
0
}
72
} // namespace
73
74
0
void StackTrace::setShowAddresses(bool show) {
75
0
    show_addresses.store(show, std::memory_order_relaxed);
76
0
}
77
78
0
std::string SigsegvErrorString(const siginfo_t& info, [[maybe_unused]] const ucontext_t& context) {
79
0
    using namespace std::string_literals;
80
0
    std::string address =
81
0
            info.si_addr == nullptr
82
0
                    ? "NULL pointer"s
83
0
                    : (shouldShowAddress(info.si_addr) ? fmt::format("{}", info.si_addr) : ""s);
84
85
0
    const std::string_view access =
86
0
#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__) && \
87
0
        !defined(__powerpc__)
88
0
            (context.uc_mcontext.gregs[REG_ERR] & 0x02) ? "write" : "read";
89
#else
90
            "";
91
#endif
92
93
0
    std::string_view message;
94
95
0
    switch (info.si_code) {
96
0
    case SEGV_ACCERR:
97
0
        message = "Attempted access has violated the permissions assigned to the memory area";
98
0
        break;
99
0
    case SEGV_MAPERR:
100
0
        message = "Address not mapped to object";
101
0
        break;
102
0
    default:
103
0
        message = "Unknown si_code";
104
0
        break;
105
0
    }
106
107
0
    return fmt::format("Address: {}. Access: {}. {}.", std::move(address), access, message);
108
0
}
109
110
0
constexpr std::string_view SigbusErrorString(int si_code) {
111
0
    switch (si_code) {
112
0
    case BUS_ADRALN:
113
0
        return "Invalid address alignment.";
114
0
    case BUS_ADRERR:
115
0
        return "Non-existent physical address.";
116
0
    case BUS_OBJERR:
117
0
        return "Object specific hardware error.";
118
119
        // Linux specific
120
0
#if defined(BUS_MCEERR_AR)
121
0
    case BUS_MCEERR_AR:
122
0
        return "Hardware memory error: action required.";
123
0
#endif
124
0
#if defined(BUS_MCEERR_AO)
125
0
    case BUS_MCEERR_AO:
126
0
        return "Hardware memory error: action optional.";
127
0
#endif
128
0
    default:
129
0
        return "Unknown si_code.";
130
0
    }
131
0
}
132
133
0
constexpr std::string_view SigfpeErrorString(int si_code) {
134
0
    switch (si_code) {
135
0
    case FPE_INTDIV:
136
0
        return "Integer divide by zero.";
137
0
    case FPE_INTOVF:
138
0
        return "Integer overflow.";
139
0
    case FPE_FLTDIV:
140
0
        return "Floating point divide by zero.";
141
0
    case FPE_FLTOVF:
142
0
        return "Floating point overflow.";
143
0
    case FPE_FLTUND:
144
0
        return "Floating point underflow.";
145
0
    case FPE_FLTRES:
146
0
        return "Floating point inexact result.";
147
0
    case FPE_FLTINV:
148
0
        return "Floating point invalid operation.";
149
0
    case FPE_FLTSUB:
150
0
        return "Subscript out of range.";
151
0
    default:
152
0
        return "Unknown si_code.";
153
0
    }
154
0
}
155
156
0
constexpr std::string_view SigillErrorString(int si_code) {
157
0
    switch (si_code) {
158
0
    case ILL_ILLOPC:
159
0
        return "Illegal opcode.";
160
0
    case ILL_ILLOPN:
161
0
        return "Illegal operand.";
162
0
    case ILL_ILLADR:
163
0
        return "Illegal addressing mode.";
164
0
    case ILL_ILLTRP:
165
0
        return "Illegal trap.";
166
0
    case ILL_PRVOPC:
167
0
        return "Privileged opcode.";
168
0
    case ILL_PRVREG:
169
0
        return "Privileged register.";
170
0
    case ILL_COPROC:
171
0
        return "Coprocessor error.";
172
0
    case ILL_BADSTK:
173
0
        return "Internal stack error.";
174
0
    default:
175
0
        return "Unknown si_code.";
176
0
    }
177
0
}
178
179
std::string signalToErrorMessage(int sig, const siginfo_t& info,
180
0
                                 [[maybe_unused]] const ucontext_t& context) {
181
0
    switch (sig) {
182
0
    case SIGSEGV:
183
0
        return SigsegvErrorString(info, context);
184
0
    case SIGBUS:
185
0
        return std::string {SigbusErrorString(info.si_code)};
186
0
    case SIGILL:
187
0
        return std::string {SigillErrorString(info.si_code)};
188
0
    case SIGFPE:
189
0
        return std::string {SigfpeErrorString(info.si_code)};
190
0
    case SIGTSTP:
191
0
        return "This is a signal used for debugging purposes by the user.";
192
0
    default:
193
0
        return "";
194
0
    }
195
0
}
196
197
0
static void* getCallerAddress(const ucontext_t& context) {
198
0
#if defined(__x86_64__)
199
    /// Get the address at the time the signal was raised from the RIP (x86-64)
200
#if defined(__FreeBSD__)
201
    return reinterpret_cast<void*>(context.uc_mcontext.mc_rip);
202
#elif defined(__APPLE__)
203
    return reinterpret_cast<void*>(context.uc_mcontext->__ss.__rip);
204
#else
205
0
    return reinterpret_cast<void*>(context.uc_mcontext.gregs[REG_RIP]);
206
0
#endif
207
#elif defined(__APPLE__) && defined(__aarch64__)
208
    return reinterpret_cast<void*>(context.uc_mcontext->__ss.__pc);
209
#elif defined(__FreeBSD__) && defined(__aarch64__)
210
    return reinterpret_cast<void*>(context.uc_mcontext.mc_gpregs.gp_elr);
211
#elif defined(__aarch64__)
212
    return reinterpret_cast<void*>(context.uc_mcontext.pc);
213
#elif defined(__powerpc64__) && defined(__linux__)
214
    return reinterpret_cast<void*>(context.uc_mcontext.gp_regs[PT_NIP]);
215
#elif defined(__powerpc64__) && defined(__FreeBSD__)
216
    return reinterpret_cast<void*>(context.uc_mcontext.mc_srr0);
217
#elif defined(__riscv)
218
    return reinterpret_cast<void*>(context.uc_mcontext.__gregs[REG_PC]);
219
#elif defined(__s390x__)
220
    return reinterpret_cast<void*>(context.uc_mcontext.psw.addr);
221
#else
222
    return nullptr;
223
#endif
224
0
}
225
226
// FIXME: looks like this is used only for Sentry but duplicates the whole algo, maybe replace?
227
void StackTrace::symbolize(const StackTrace::FramePointers& frame_pointers,
228
                           [[maybe_unused]] size_t offset, size_t size,
229
0
                           StackTrace::Frames& frames) {
230
0
#if defined(__ELF__) && !defined(__FreeBSD__)
231
0
    auto symbol_index_ptr = doris::SymbolIndex::instance();
232
0
    const doris::SymbolIndex& symbol_index = *symbol_index_ptr;
233
0
    std::unordered_map<std::string, doris::Dwarf> dwarfs;
234
235
0
    for (size_t i = 0; i < offset; ++i) {
236
0
        frames[i].virtual_addr = frame_pointers[i];
237
0
    }
238
239
0
    for (size_t i = offset; i < size; ++i) {
240
0
        StackTrace::Frame& current_frame = frames[i];
241
0
        current_frame.virtual_addr = frame_pointers[i];
242
0
        const auto* object = symbol_index.findObject(current_frame.virtual_addr);
243
0
        uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
244
0
        current_frame.physical_addr =
245
0
                reinterpret_cast<void*>(uintptr_t(current_frame.virtual_addr) - virtual_offset);
246
247
0
        if (object) {
248
0
            current_frame.object = object->name;
249
0
            if (std::error_code ec;
250
0
                std::filesystem::exists(current_frame.object.value(), ec) && !ec) {
251
0
                auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
252
253
0
                doris::Dwarf::LocationInfo location;
254
0
                std::vector<doris::Dwarf::SymbolizedFrame> inline_frames;
255
0
                if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location,
256
0
                                                 doris::Dwarf::LocationInfoMode::FAST,
257
0
                                                 inline_frames)) {
258
0
                    current_frame.file = location.file.toString();
259
0
                    current_frame.line = location.line;
260
0
                }
261
0
            }
262
0
        } else {
263
0
            current_frame.object = "?";
264
0
        }
265
266
0
        if (const auto* symbol = symbol_index.findSymbol(current_frame.virtual_addr)) {
267
0
            current_frame.symbol = demangle(symbol->name);
268
0
        } else {
269
0
            current_frame.symbol = "?";
270
0
        }
271
0
    }
272
#else
273
    for (size_t i = 0; i < size; ++i) frames[i].virtual_addr = frame_pointers[i];
274
#endif
275
0
}
276
277
0
StackTrace::StackTrace(const ucontext_t& signal_context) {
278
0
    tryCapture();
279
280
    /// This variable from signal handler is not instrumented by Memory Sanitizer.
281
0
    __msan_unpoison(&signal_context, sizeof(signal_context));
282
283
0
    void* caller_address = getCallerAddress(signal_context);
284
285
0
    if (size == 0 && caller_address) {
286
0
        frame_pointers[0] = caller_address;
287
0
        size = 1;
288
0
    } else {
289
        /// Skip excessive stack frames that we have created while finding stack trace.
290
0
        for (size_t i = 0; i < size; ++i) {
291
0
            if (frame_pointers[i] == caller_address) {
292
0
                offset = i;
293
0
                break;
294
0
            }
295
0
        }
296
0
    }
297
0
}
298
299
0
void StackTrace::tryCapture() {
300
    // When unw_backtrace is not available, fall back on the standard
301
    // `backtrace` function from execinfo.h.
302
0
#if USE_UNWIND && defined(__x86_64__) // TODO
303
0
    size = unw_backtrace(frame_pointers.data(), capacity);
304
#else
305
    size = backtrace(frame_pointers.data(), capacity);
306
#endif
307
0
    __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
308
0
}
309
310
/// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them
311
constexpr std::pair<std::string_view, std::string_view> replacements[] = {
312
        {"::__1", ""},
313
        {"std::basic_string<char, std::char_traits<char>, std::allocator<char>>", "std::string"}};
314
315
0
std::string collapseNames(std::string&& haystack) {
316
    // TODO: surely there is a written version already for better in place search&replace
317
0
    for (auto [needle, to] : replacements) {
318
0
        size_t pos = 0;
319
0
        while ((pos = haystack.find(needle, pos)) != std::string::npos) {
320
0
            haystack.replace(pos, needle.length(), to);
321
0
            pos += to.length();
322
0
        }
323
0
    }
324
325
0
    return haystack;
326
0
}
327
328
struct StackTraceRefTriple {
329
    const StackTrace::FramePointers& pointers;
330
    size_t offset;
331
    size_t size;
332
};
333
334
struct StackTraceTriple {
335
    StackTrace::FramePointers pointers;
336
    size_t offset;
337
    size_t size;
338
};
339
340
template <class T>
341
concept MaybeRef = std::is_same_v<T, StackTraceTriple> || std::is_same_v<T, StackTraceRefTriple>;
342
343
0
constexpr bool operator<(const MaybeRef auto& left, const MaybeRef auto& right) {
344
0
    return std::tuple {left.pointers, left.size, left.offset} <
345
0
           std::tuple {right.pointers, right.size, right.offset};
346
0
}
Unexecuted instantiation: _ZltI16StackTraceTriple19StackTraceRefTripleEbRKT_RKT0_
Unexecuted instantiation: _ZltI19StackTraceRefTriple16StackTraceTripleEbRKT_RKT0_
Unexecuted instantiation: _ZltI16StackTraceTripleS0_EbRKT_RKT0_
347
348
static void toStringEveryLineImpl([[maybe_unused]] const std::string dwarf_location_info_mode,
349
                                  const StackTraceRefTriple& stack_trace,
350
0
                                  std::function<void(std::string_view)> callback) {
351
0
    if (stack_trace.size == 0) {
352
0
        return callback("<Empty trace>");
353
0
    }
354
0
#if defined(__ELF__) && !defined(__FreeBSD__)
355
356
0
    using enum doris::Dwarf::LocationInfoMode;
357
0
    doris::Dwarf::LocationInfoMode mode;
358
0
    auto dwarf_location_info_mode_lower = doris::to_lower(dwarf_location_info_mode);
359
0
    if (dwarf_location_info_mode_lower == "disabled") {
360
0
        mode = DISABLED;
361
0
    } else if (dwarf_location_info_mode_lower == "fast") {
362
0
        mode = FAST;
363
0
    } else if (dwarf_location_info_mode_lower == "full") {
364
0
        mode = FULL;
365
0
    } else if (dwarf_location_info_mode_lower == "full_with_inline") {
366
0
        mode = FULL_WITH_INLINE;
367
0
    } else {
368
0
        LOG(INFO) << "invalid LocationInfoMode: " << dwarf_location_info_mode;
369
0
        mode = DISABLED;
370
0
    }
371
0
    auto symbol_index_ptr = doris::SymbolIndex::instance();
372
0
    const doris::SymbolIndex& symbol_index = *symbol_index_ptr;
373
0
    std::unordered_map<std::string, doris::Dwarf> dwarfs;
374
0
    for (size_t i = stack_trace.offset; i < stack_trace.size; ++i) {
375
0
        std::vector<doris::Dwarf::SymbolizedFrame> inline_frames;
376
0
        const void* virtual_addr = stack_trace.pointers[i];
377
0
        const auto* object = symbol_index.findObject(virtual_addr);
378
0
        uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
379
0
        const void* physical_addr =
380
0
                reinterpret_cast<const void*>(uintptr_t(virtual_addr) - virtual_offset);
381
382
0
        std::stringstream out;
383
0
        out << "\t" << i << "# ";
384
0
        if (i < 10) { // for alignment
385
0
            out << " ";
386
0
        }
387
388
0
        if (const auto* const symbol = symbol_index.findSymbol(virtual_addr)) {
389
0
            out << collapseNames(demangle(symbol->name));
390
0
        } else {
391
0
            out << "?";
392
0
        }
393
394
0
        if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec) {
395
0
            auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
396
397
0
            doris::Dwarf::LocationInfo location;
398
399
0
            if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, mode,
400
0
                                             inline_frames)) {
401
0
                out << " at " << location.file.toString() << ":" << location.line;
402
0
            }
403
0
        }
404
405
        // Do not display the stack address and file name, it is not important.
406
        // if (shouldShowAddress(physical_addr)) {
407
        //     out << " @ ";
408
        //     writePointerHex(physical_addr, out);
409
        // }
410
411
        // out << "  in " << (object ? object->name : "?");
412
413
0
        callback(out.str());
414
415
0
        for (size_t j = 0; j < inline_frames.size(); ++j) {
416
0
            const auto& frame = inline_frames[j];
417
0
            callback(fmt::format("\t{}.{}. inlined from {}: {}:{}", i, j + 1,
418
0
                                 collapseNames(demangle(frame.name)),
419
0
                                 frame.location.file.toString(), frame.location.line));
420
0
        }
421
0
    }
422
#else
423
    for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
424
        if (const void* const addr = stack_trace.pointers[i]; shouldShowAddress(addr))
425
            callback(fmt::format("{}. {}", i, addr));
426
#endif
427
0
}
428
429
0
void StackTrace::toStringEveryLine(std::function<void(std::string_view)> callback) const {
430
0
    toStringEveryLineImpl("FULL_WITH_INLINE", {frame_pointers, offset, size}, std::move(callback));
431
0
}
432
433
using StackTraceCache = std::map<StackTraceTriple, std::string, std::less<>>;
434
435
0
static StackTraceCache& cacheInstance() {
436
0
    static StackTraceCache cache;
437
0
    return cache;
438
0
}
439
440
static std::mutex stacktrace_cache_mutex;
441
442
std::string toStringCached(const StackTrace::FramePointers& pointers, size_t offset, size_t size,
443
0
                           const std::string& dwarf_location_info_mode) {
444
    /// Calculation of stack trace text is extremely slow.
445
    /// We use simple cache because otherwise the server could be overloaded by trash queries.
446
    /// Note that this cache can grow unconditionally, but practically it should be small.
447
0
    std::lock_guard lock {stacktrace_cache_mutex};
448
449
0
    StackTraceCache& cache = cacheInstance();
450
0
    const StackTraceRefTriple key {pointers, offset, size};
451
452
0
    if (auto it = cache.find(key); it != cache.end()) {
453
0
        return it->second;
454
0
    } else {
455
0
        std::stringstream out;
456
0
        toStringEveryLineImpl(dwarf_location_info_mode, key,
457
0
                              [&](std::string_view str) { out << str << '\n'; });
458
459
0
        return cache.emplace(StackTraceTriple {pointers, offset, size}, out.str()).first->second;
460
0
    }
461
0
}
462
463
std::string StackTrace::toString(int start_pointers_index,
464
0
                                 const std::string& dwarf_location_info_mode) const {
465
    // Default delete the first three frame pointers, which are inside the stack_trace.cpp.
466
0
    start_pointers_index += 3;
467
0
    StackTrace::FramePointers frame_pointers_raw {};
468
0
    std::copy(frame_pointers.begin() + start_pointers_index, frame_pointers.end(),
469
0
              frame_pointers_raw.begin());
470
0
    return toStringCached(frame_pointers_raw, offset, size - start_pointers_index,
471
0
                          dwarf_location_info_mode);
472
0
}
473
474
std::string StackTrace::toString(void** frame_pointers_raw, size_t offset, size_t size,
475
0
                                 const std::string& dwarf_location_info_mode) {
476
0
    __msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));
477
478
0
    StackTrace::FramePointers frame_pointers {};
479
0
    std::copy_n(frame_pointers_raw, size, frame_pointers.begin());
480
481
0
    return toStringCached(frame_pointers, offset, size, dwarf_location_info_mode);
482
0
}
483
484
0
void StackTrace::createCache() {
485
0
    std::lock_guard lock {stacktrace_cache_mutex};
486
0
    cacheInstance();
487
0
}
488
489
0
void StackTrace::dropCache() {
490
0
    std::lock_guard lock {stacktrace_cache_mutex};
491
0
    cacheInstance().clear();
492
0
}