Coverage Report

Created: 2025-05-19 20:49

/root/doris/be/src/util/perf_counters.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/perf-counters.cpp
19
// and modified by Doris
20
21
#include "util/perf_counters.h"
22
23
#include <linux/perf_event.h>
24
#include <stdlib.h>
25
#include <string.h>
26
#include <sys/syscall.h>
27
#include <unistd.h>
28
29
#include <boost/algorithm/string/trim.hpp>
30
#include <fstream> // IWYU pragma: keep
31
#include <iomanip>
32
#include <iostream>
33
#include <unordered_map>
34
#include <utility>
35
36
#include "absl/strings/substitute.h"
37
#include "util/pretty_printer.h"
38
#include "util/string_parser.hpp"
39
#include "util/string_util.h"
40
41
namespace doris {
42
43
0
#define COUNTER_SIZE (sizeof(void*))
44
0
#define PRETTY_PRINT_WIDTH 13
45
46
static std::unordered_map<std::string, std::string> _process_state;
47
48
int64_t PerfCounters::_vm_rss = 0;
49
std::string PerfCounters::_vm_rss_str = "";
50
int64_t PerfCounters::_vm_hwm = 0;
51
int64_t PerfCounters::_vm_size = 0;
52
int64_t PerfCounters::_vm_peak = 0;
53
54
// This is the order of the counters in /proc/self/io
55
enum PERF_IO_IDX {
56
    PROC_IO_READ = 0,
57
    PROC_IO_WRITE,
58
    PROC_IO_SYS_RREAD,
59
    PROC_IO_SYS_WRITE,
60
    PROC_IO_DISK_READ,
61
    PROC_IO_DISK_WRITE,
62
    PROC_IO_CANCELLED_WRITE,
63
    PROC_IO_LAST_COUNTER,
64
};
65
66
// Wrapper around sys call.  This syscall is hard to use and this is how it is recommended
67
// to be used.
68
static inline int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
69
0
                                      int group_fd, unsigned long flags) {
70
0
    attr->size = sizeof(*attr);
71
0
    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
72
0
}
73
74
// Remap PerfCounters::Counter to Linux kernel enums
75
0
static bool init_event_attr(perf_event_attr* attr, PerfCounters::Counter counter) {
76
0
    memset(attr, 0, sizeof(perf_event_attr));
77
78
0
    switch (counter) {
79
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
80
0
        attr->type = PERF_TYPE_SOFTWARE;
81
0
        attr->config = PERF_COUNT_SW_CPU_CLOCK;
82
0
        break;
83
84
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
85
0
        attr->type = PERF_TYPE_SOFTWARE;
86
0
        attr->config = PERF_COUNT_SW_PAGE_FAULTS;
87
0
        break;
88
89
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
90
0
        attr->type = PERF_TYPE_SOFTWARE;
91
0
        attr->config = PERF_COUNT_SW_PAGE_FAULTS;
92
0
        break;
93
94
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
95
0
        attr->type = PERF_TYPE_SOFTWARE;
96
0
        attr->config = PERF_COUNT_SW_CPU_MIGRATIONS;
97
0
        break;
98
99
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
100
0
        attr->type = PERF_TYPE_HARDWARE;
101
0
        attr->config = PERF_COUNT_HW_CPU_CYCLES;
102
0
        break;
103
104
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
105
0
        attr->type = PERF_TYPE_HARDWARE;
106
0
        attr->config = PERF_COUNT_HW_INSTRUCTIONS;
107
0
        break;
108
109
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
110
0
        attr->type = PERF_TYPE_HARDWARE;
111
0
        attr->config = PERF_COUNT_HW_CACHE_REFERENCES;
112
0
        break;
113
114
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
115
0
        attr->type = PERF_TYPE_HARDWARE;
116
0
        attr->config = PERF_COUNT_HW_CACHE_MISSES;
117
0
        break;
118
119
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
120
0
        attr->type = PERF_TYPE_HARDWARE;
121
0
        attr->config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
122
0
        break;
123
124
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
125
0
        attr->type = PERF_TYPE_HARDWARE;
126
0
        attr->config = PERF_COUNT_HW_BRANCH_MISSES;
127
0
        break;
128
129
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
130
0
        attr->type = PERF_TYPE_HARDWARE;
131
0
        attr->config = PERF_COUNT_HW_BUS_CYCLES;
132
0
        break;
133
134
0
    default:
135
0
        return false;
136
0
    }
137
138
0
    return true;
139
0
}
140
141
0
static std::string get_counter_name(PerfCounters::Counter counter) {
142
0
    switch (counter) {
143
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
144
0
        return "CPUTime";
145
146
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
147
0
        return "PageFaults";
148
149
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
150
0
        return "ContextSwitches";
151
152
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
153
0
        return "CPUMigrations";
154
155
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
156
0
        return "HWCycles";
157
158
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
159
0
        return "Instructions";
160
161
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
162
0
        return "CacheHit";
163
164
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
165
0
        return "CacheMiss";
166
167
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
168
0
        return "Branches";
169
170
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
171
0
        return "BranchMiss";
172
173
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
174
0
        return "BusCycles";
175
176
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
177
0
        return "VmUsage";
178
179
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
180
0
        return "PeakVmUsage";
181
182
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
183
0
        return "WorkingSet";
184
185
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
186
0
        return "BytesRead";
187
188
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
189
0
        return "BytesWritten";
190
191
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
192
0
        return "DiskRead";
193
194
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
195
0
        return "DiskWrite";
196
197
0
    default:
198
0
        return "";
199
0
    }
200
0
}
201
202
0
bool PerfCounters::init_sys_counter(Counter counter) {
203
0
    CounterData data;
204
0
    data.counter = counter;
205
0
    data.source = PerfCounters::SYS_PERF_COUNTER;
206
0
    data.fd = -1;
207
0
    perf_event_attr attr;
208
209
0
    if (!init_event_attr(&attr, counter)) {
210
0
        return false;
211
0
    }
212
213
0
    int fd = sys_perf_event_open(&attr, getpid(), -1, _group_fd, 0);
214
215
0
    if (fd < 0) {
216
0
        return false;
217
0
    }
218
219
0
    if (_group_fd == -1) {
220
0
        _group_fd = fd;
221
0
    }
222
223
0
    data.fd = fd;
224
225
0
    if (counter == PERF_COUNTER_SW_CPU_CLOCK) {
226
0
        data.type = TUnit::TIME_NS;
227
0
    } else {
228
0
        data.type = TUnit::UNIT;
229
0
    }
230
231
0
    _counters.push_back(data);
232
0
    return true;
233
0
}
234
235
0
bool PerfCounters::init_proc_self_io_counter(Counter counter) {
236
0
    CounterData data;
237
0
    data.counter = counter;
238
0
    data.source = PerfCounters::PROC_SELF_IO;
239
0
    data.type = TUnit::BYTES;
240
241
0
    switch (counter) {
242
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
243
0
        data.proc_io_line_number = PROC_IO_READ;
244
0
        break;
245
246
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
247
0
        data.proc_io_line_number = PROC_IO_WRITE;
248
0
        break;
249
250
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
251
0
        data.proc_io_line_number = PROC_IO_DISK_READ;
252
0
        break;
253
254
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
255
0
        data.proc_io_line_number = PROC_IO_DISK_WRITE;
256
0
        break;
257
258
0
    default:
259
0
        return false;
260
0
    }
261
262
0
    _counters.push_back(data);
263
0
    return true;
264
0
}
265
266
0
bool PerfCounters::init_proc_self_status_counter(Counter counter) {
267
0
    CounterData data {};
268
0
    data.counter = counter;
269
0
    data.source = PerfCounters::PROC_SELF_STATUS;
270
0
    data.type = TUnit::BYTES;
271
272
0
    switch (counter) {
273
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
274
0
        data.proc_status_field = "VmSize";
275
0
        break;
276
277
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
278
0
        data.proc_status_field = "VmPeak";
279
0
        break;
280
281
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
282
0
        data.proc_status_field = "VmRS";
283
0
        break;
284
285
0
    default:
286
0
        return false;
287
0
    }
288
289
0
    _counters.push_back(data);
290
0
    return true;
291
0
}
292
293
0
bool PerfCounters::get_sys_counters(std::vector<int64_t>& buffer) {
294
0
    for (int i = 0; i < _counters.size(); i++) {
295
0
        if (_counters[i].source == SYS_PERF_COUNTER) {
296
0
            int num_bytes = read(_counters[i].fd, &buffer[i], COUNTER_SIZE);
297
298
0
            if (num_bytes != COUNTER_SIZE) {
299
0
                return false;
300
0
            }
301
302
0
            if (_counters[i].type == TUnit::TIME_NS) {
303
0
                buffer[i] /= 1000000;
304
0
            }
305
0
        }
306
0
    }
307
308
0
    return true;
309
0
}
310
311
// Parse out IO counters from /proc/self/io.  The file contains a list of
312
// (name,byte) pairs.
313
// For example:
314
//    rchar: 210212
315
//    wchar: 94
316
//    syscr: 118
317
//    syscw: 3
318
//    read_bytes: 0
319
//    write_bytes: 0
320
//    cancelled_write_bytes: 0
321
0
bool PerfCounters::get_proc_self_io_counters(std::vector<int64_t>& buffer) {
322
0
    std::ifstream file("/proc/self/io", std::ios::in);
323
0
    std::string buf;
324
0
    int64_t values[PROC_IO_LAST_COUNTER];
325
0
    int ret = 0;
326
327
0
    for (int i = 0; i < PROC_IO_LAST_COUNTER; ++i) {
328
0
        if (!file) {
329
0
            ret = -1;
330
0
            break;
331
0
        }
332
333
0
        getline(file, buf);
334
0
        size_t colon = buf.find(':');
335
336
0
        if (colon == std::string::npos) {
337
0
            ret = -1;
338
0
            break;
339
0
        }
340
341
0
        buf = buf.substr(colon + 1);
342
0
        std::istringstream stream(buf);
343
0
        stream >> values[i];
344
0
    }
345
346
0
    if (ret == 0) {
347
0
        for (int i = 0; i < _counters.size(); ++i) {
348
0
            if (_counters[i].source == PROC_SELF_IO) {
349
0
                buffer[i] = values[_counters[i].proc_io_line_number];
350
0
            }
351
0
        }
352
0
    }
353
354
0
    if (file.is_open()) {
355
0
        file.close();
356
0
    }
357
358
0
    return true;
359
0
}
360
361
0
bool PerfCounters::get_proc_self_status_counters(std::vector<int64_t>& buffer) {
362
0
    std::ifstream file("/proc/self/status", std::ios::in);
363
0
    std::string buf;
364
365
0
    while (file) {
366
0
        getline(file, buf);
367
368
0
        for (int i = 0; i < _counters.size(); ++i) {
369
0
            if (_counters[i].source == PROC_SELF_STATUS) {
370
0
                size_t field = buf.find(_counters[i].proc_status_field);
371
372
0
                if (field == std::string::npos) {
373
0
                    continue;
374
0
                }
375
376
0
                size_t colon = field + _counters[i].proc_status_field.size() + 1;
377
0
                buf = buf.substr(colon + 1);
378
0
                std::istringstream stream(buf);
379
0
                int64_t value;
380
0
                stream >> value;
381
0
                buffer[i] = value * 1024; // values in file are in kb
382
0
            }
383
0
        }
384
0
    }
385
386
0
    if (file.is_open()) {
387
0
        file.close();
388
0
    }
389
390
0
    return true;
391
0
}
392
393
0
PerfCounters::PerfCounters() : _group_fd(-1) {}
394
395
// Close all fds for the counters
396
0
PerfCounters::~PerfCounters() {
397
0
    for (int i = 0; i < _counters.size(); ++i) {
398
0
        if (_counters[i].source == SYS_PERF_COUNTER) {
399
0
            close(_counters[i].fd);
400
0
        }
401
0
    }
402
0
}
403
404
// Add here the default ones that are most useful
405
0
bool PerfCounters::add_default_counters() {
406
0
    bool result = true;
407
0
    result &= add_counter(PERF_COUNTER_SW_CPU_CLOCK);
408
    // These hardware ones don't work on a vm, just ignore if they fail
409
    // TODO: these don't work reliably and aren't that useful.  Turn them off.
410
    //add_counter(PERF_COUNTER_HW_INSTRUCTIONS);
411
    //add_counter(PERF_COUNTER_HW_CPU_CYCLES);
412
    //add_counter(PERF_COUNTER_HW_BRANCHES);
413
    //add_counter(PERF_COUNTER_HW_BRANCH_MISSES);
414
    //add_counter(PERF_COUNTER_HW_CACHE_MISSES);
415
0
    add_counter(PERF_COUNTER_VM_USAGE);
416
0
    add_counter(PERF_COUNTER_VM_PEAK_USAGE);
417
0
    add_counter(PERF_COUNTER_RESIDENT_SET_SIZE);
418
0
    result &= add_counter(PERF_COUNTER_DISK_READ);
419
0
    return result;
420
0
}
421
422
// Add a specific counter
423
0
bool PerfCounters::add_counter(Counter counter) {
424
    // Ignore if it's already added.
425
0
    for (int i = 0; i < _counters.size(); ++i) {
426
0
        if (_counters[i].counter == counter) {
427
0
            return true;
428
0
        }
429
0
    }
430
431
0
    bool result = false;
432
433
0
    switch (counter) {
434
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
435
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
436
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
437
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
438
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
439
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
440
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
441
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
442
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
443
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
444
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
445
0
        result = init_sys_counter(counter);
446
0
        break;
447
448
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
449
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
450
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
451
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
452
0
        result = init_proc_self_io_counter(counter);
453
0
        break;
454
455
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
456
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
457
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
458
0
        result = init_proc_self_status_counter(counter);
459
0
        break;
460
461
0
    default:
462
0
        return false;
463
0
    }
464
465
0
    if (result) {
466
0
        _counter_names.push_back(get_counter_name(counter));
467
0
    }
468
469
0
    return result;
470
0
}
471
472
// Query all the counters right now and store the values in results
473
0
void PerfCounters::snapshot(const std::string& name) {
474
0
    if (_counters.size() == 0) {
475
0
        return;
476
0
    }
477
478
0
    std::string fixed_name = name;
479
480
0
    if (fixed_name.size() == 0) {
481
0
        std::stringstream ss;
482
0
        ss << _snapshots.size() + 1;
483
0
        fixed_name = ss.str();
484
0
    }
485
486
0
    std::vector<int64_t> buffer(_counters.size());
487
488
0
    get_sys_counters(buffer);
489
0
    get_proc_self_io_counters(buffer);
490
0
    get_proc_self_status_counters(buffer);
491
492
0
    _snapshots.push_back(buffer);
493
0
    _snapshot_names.push_back(fixed_name);
494
0
}
495
496
0
const std::vector<int64_t>* PerfCounters::counters(int snapshot) const {
497
0
    if (snapshot < 0 || snapshot >= _snapshots.size()) {
498
0
        return nullptr;
499
0
    }
500
501
0
    return &_snapshots[snapshot];
502
0
}
503
504
0
void PerfCounters::pretty_print(std::ostream* s) const {
505
0
    std::ostream& stream = *s;
506
0
    stream << std::setw(8) << "snapshot";
507
508
0
    for (int i = 0; i < _counter_names.size(); ++i) {
509
0
        stream << std::setw(PRETTY_PRINT_WIDTH) << _counter_names[i];
510
0
    }
511
512
0
    stream << std::endl;
513
514
0
    for (int s = 0; s < _snapshots.size(); s++) {
515
0
        stream << std::setw(8) << _snapshot_names[s];
516
0
        const std::vector<int64_t>& snapshot = _snapshots[s];
517
518
0
        for (int i = 0; i < snapshot.size(); ++i) {
519
0
            stream << std::setw(PRETTY_PRINT_WIDTH)
520
0
                   << PrettyPrinter::print(snapshot[i], _counters[i].type);
521
0
        }
522
523
0
        stream << std::endl;
524
0
    }
525
526
0
    stream << std::endl;
527
0
}
528
529
// Refactor below
530
531
0
int PerfCounters::parse_int(const std::string& state_key) {
532
0
    auto it = _process_state.find(state_key);
533
0
    if (it != _process_state.end()) return atoi(it->second.c_str());
534
0
    return -1;
535
0
}
536
537
0
int64_t PerfCounters::parse_int64(const std::string& state_key) {
538
0
    auto it = _process_state.find(state_key);
539
0
    if (it != _process_state.end()) {
540
0
        StringParser::ParseResult result;
541
0
        int64_t state_value =
542
0
                StringParser::string_to_int<int64_t>(it->second.data(), it->second.size(), &result);
543
0
        if (result == StringParser::PARSE_SUCCESS) return state_value;
544
0
    }
545
0
    return -1;
546
0
}
547
548
0
std::string PerfCounters::parse_string(const std::string& state_key) {
549
0
    auto it = _process_state.find(state_key);
550
0
    if (it != _process_state.end()) return it->second;
551
0
    return "";
552
0
}
553
554
0
int64_t PerfCounters::parse_bytes(const std::string& state_key) {
555
0
    auto it = _process_state.find(state_key);
556
0
    if (it != _process_state.end()) {
557
0
        std::vector<std::string> fields = split(it->second, " ");
558
        // We expect state_value such as, e.g., '16129508', '16129508 kB', '16129508 mB'
559
0
        StringParser::ParseResult result;
560
0
        int64_t state_value =
561
0
                StringParser::string_to_int<int64_t>(fields[0].data(), fields[0].size(), &result);
562
0
        if (result == StringParser::PARSE_SUCCESS) {
563
0
            if (fields.size() < 2) return state_value;
564
0
            if (fields[1].compare("kB") == 0) return state_value * 1024L;
565
0
        }
566
0
    }
567
0
    return -1;
568
0
}
569
570
0
void PerfCounters::refresh_proc_status() {
571
0
    std::ifstream statusinfo("/proc/self/status", std::ios::in);
572
0
    std::string line;
573
0
    while (statusinfo.good() && !statusinfo.eof()) {
574
0
        getline(statusinfo, line);
575
0
        std::vector<std::string> fields = split(line, "\t");
576
0
        if (fields.size() < 2) continue;
577
0
        boost::algorithm::trim(fields[1]);
578
0
        std::string key = fields[0].substr(0, fields[0].size() - 1);
579
0
        _process_state[absl::Substitute("status/$0", key)] = fields[1];
580
0
    }
581
582
0
    if (statusinfo.is_open()) statusinfo.close();
583
584
0
    _vm_size = parse_bytes("status/VmSize");
585
0
    _vm_peak = parse_bytes("status/VmPeak");
586
0
    _vm_rss = parse_bytes("status/VmRSS");
587
0
#ifdef ADDRESS_SANITIZER
588
0
    _vm_rss_str = "[ASAN]" + PrettyPrinter::print(_vm_rss, TUnit::BYTES);
589
#else
590
    _vm_rss_str = PrettyPrinter::print(_vm_rss, TUnit::BYTES);
591
#endif
592
0
    _vm_hwm = parse_bytes("status/VmHWM");
593
0
}
594
595
0
void PerfCounters::get_proc_status(ProcStatus* out) {
596
0
    out->vm_size = parse_bytes("status/VmSize");
597
0
    out->vm_peak = parse_bytes("status/VmPeak");
598
0
    out->vm_rss = parse_bytes("status/VmRSS");
599
0
    out->vm_hwm = parse_bytes("status/VmHWM");
600
0
}
601
602
} // namespace doris