Coverage Report

Created: 2024-11-22 21:49

/root/doris/be/src/util/perf_counters.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/perf-counters.cpp
19
// and modified by Doris
20
21
#include "util/perf_counters.h"
22
23
#include <linux/perf_event.h>
24
#include <stdlib.h>
25
#include <string.h>
26
#include <sys/syscall.h>
27
#include <unistd.h>
28
29
#include <boost/algorithm/string/trim.hpp>
30
#include <fstream> // IWYU pragma: keep
31
#include <iomanip>
32
#include <iostream>
33
#include <unordered_map>
34
#include <utility>
35
36
#include "gutil/stringprintf.h"
37
#include "gutil/strings/substitute.h"
38
#include "util/pretty_printer.h"
39
#include "util/string_parser.hpp"
40
#include "util/string_util.h"
41
42
namespace doris {
43
44
0
#define COUNTER_SIZE (sizeof(void*))
45
0
#define PRETTY_PRINT_WIDTH 13
46
47
static std::unordered_map<std::string, std::string> _process_state;
48
49
int64_t PerfCounters::_vm_rss = 0;
50
std::string PerfCounters::_vm_rss_str = "";
51
int64_t PerfCounters::_vm_hwm = 0;
52
int64_t PerfCounters::_vm_size = 0;
53
int64_t PerfCounters::_vm_peak = 0;
54
55
// This is the order of the counters in /proc/self/io
56
enum PERF_IO_IDX {
57
    PROC_IO_READ = 0,
58
    PROC_IO_WRITE,
59
    PROC_IO_SYS_RREAD,
60
    PROC_IO_SYS_WRITE,
61
    PROC_IO_DISK_READ,
62
    PROC_IO_DISK_WRITE,
63
    PROC_IO_CANCELLED_WRITE,
64
    PROC_IO_LAST_COUNTER,
65
};
66
67
// Wrapper around sys call.  This syscall is hard to use and this is how it is recommended
68
// to be used.
69
static inline int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu,
70
0
                                      int group_fd, unsigned long flags) {
71
0
    attr->size = sizeof(*attr);
72
0
    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
73
0
}
74
75
// Remap PerfCounters::Counter to Linux kernel enums
76
0
static bool init_event_attr(perf_event_attr* attr, PerfCounters::Counter counter) {
77
0
    memset(attr, 0, sizeof(perf_event_attr));
78
79
0
    switch (counter) {
80
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
81
0
        attr->type = PERF_TYPE_SOFTWARE;
82
0
        attr->config = PERF_COUNT_SW_CPU_CLOCK;
83
0
        break;
84
85
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
86
0
        attr->type = PERF_TYPE_SOFTWARE;
87
0
        attr->config = PERF_COUNT_SW_PAGE_FAULTS;
88
0
        break;
89
90
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
91
0
        attr->type = PERF_TYPE_SOFTWARE;
92
0
        attr->config = PERF_COUNT_SW_PAGE_FAULTS;
93
0
        break;
94
95
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
96
0
        attr->type = PERF_TYPE_SOFTWARE;
97
0
        attr->config = PERF_COUNT_SW_CPU_MIGRATIONS;
98
0
        break;
99
100
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
101
0
        attr->type = PERF_TYPE_HARDWARE;
102
0
        attr->config = PERF_COUNT_HW_CPU_CYCLES;
103
0
        break;
104
105
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
106
0
        attr->type = PERF_TYPE_HARDWARE;
107
0
        attr->config = PERF_COUNT_HW_INSTRUCTIONS;
108
0
        break;
109
110
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
111
0
        attr->type = PERF_TYPE_HARDWARE;
112
0
        attr->config = PERF_COUNT_HW_CACHE_REFERENCES;
113
0
        break;
114
115
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
116
0
        attr->type = PERF_TYPE_HARDWARE;
117
0
        attr->config = PERF_COUNT_HW_CACHE_MISSES;
118
0
        break;
119
120
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
121
0
        attr->type = PERF_TYPE_HARDWARE;
122
0
        attr->config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
123
0
        break;
124
125
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
126
0
        attr->type = PERF_TYPE_HARDWARE;
127
0
        attr->config = PERF_COUNT_HW_BRANCH_MISSES;
128
0
        break;
129
130
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
131
0
        attr->type = PERF_TYPE_HARDWARE;
132
0
        attr->config = PERF_COUNT_HW_BUS_CYCLES;
133
0
        break;
134
135
0
    default:
136
0
        return false;
137
0
    }
138
139
0
    return true;
140
0
}
141
142
0
static std::string get_counter_name(PerfCounters::Counter counter) {
143
0
    switch (counter) {
144
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
145
0
        return "CPUTime";
146
147
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
148
0
        return "PageFaults";
149
150
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
151
0
        return "ContextSwitches";
152
153
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
154
0
        return "CPUMigrations";
155
156
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
157
0
        return "HWCycles";
158
159
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
160
0
        return "Instructions";
161
162
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
163
0
        return "CacheHit";
164
165
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
166
0
        return "CacheMiss";
167
168
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
169
0
        return "Branches";
170
171
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
172
0
        return "BranchMiss";
173
174
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
175
0
        return "BusCycles";
176
177
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
178
0
        return "VmUsage";
179
180
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
181
0
        return "PeakVmUsage";
182
183
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
184
0
        return "WorkingSet";
185
186
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
187
0
        return "BytesRead";
188
189
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
190
0
        return "BytesWritten";
191
192
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
193
0
        return "DiskRead";
194
195
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
196
0
        return "DiskWrite";
197
198
0
    default:
199
0
        return "";
200
0
    }
201
0
}
202
203
0
bool PerfCounters::init_sys_counter(Counter counter) {
204
0
    CounterData data;
205
0
    data.counter = counter;
206
0
    data.source = PerfCounters::SYS_PERF_COUNTER;
207
0
    data.fd = -1;
208
0
    perf_event_attr attr;
209
210
0
    if (!init_event_attr(&attr, counter)) {
211
0
        return false;
212
0
    }
213
214
0
    int fd = sys_perf_event_open(&attr, getpid(), -1, _group_fd, 0);
215
216
0
    if (fd < 0) {
217
0
        return false;
218
0
    }
219
220
0
    if (_group_fd == -1) {
221
0
        _group_fd = fd;
222
0
    }
223
224
0
    data.fd = fd;
225
226
0
    if (counter == PERF_COUNTER_SW_CPU_CLOCK) {
227
0
        data.type = TUnit::TIME_NS;
228
0
    } else {
229
0
        data.type = TUnit::UNIT;
230
0
    }
231
232
0
    _counters.push_back(data);
233
0
    return true;
234
0
}
235
236
0
bool PerfCounters::init_proc_self_io_counter(Counter counter) {
237
0
    CounterData data;
238
0
    data.counter = counter;
239
0
    data.source = PerfCounters::PROC_SELF_IO;
240
0
    data.type = TUnit::BYTES;
241
242
0
    switch (counter) {
243
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
244
0
        data.proc_io_line_number = PROC_IO_READ;
245
0
        break;
246
247
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
248
0
        data.proc_io_line_number = PROC_IO_WRITE;
249
0
        break;
250
251
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
252
0
        data.proc_io_line_number = PROC_IO_DISK_READ;
253
0
        break;
254
255
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
256
0
        data.proc_io_line_number = PROC_IO_DISK_WRITE;
257
0
        break;
258
259
0
    default:
260
0
        return false;
261
0
    }
262
263
0
    _counters.push_back(data);
264
0
    return true;
265
0
}
266
267
0
bool PerfCounters::init_proc_self_status_counter(Counter counter) {
268
0
    CounterData data {};
269
0
    data.counter = counter;
270
0
    data.source = PerfCounters::PROC_SELF_STATUS;
271
0
    data.type = TUnit::BYTES;
272
273
0
    switch (counter) {
274
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
275
0
        data.proc_status_field = "VmSize";
276
0
        break;
277
278
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
279
0
        data.proc_status_field = "VmPeak";
280
0
        break;
281
282
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
283
0
        data.proc_status_field = "VmRS";
284
0
        break;
285
286
0
    default:
287
0
        return false;
288
0
    }
289
290
0
    _counters.push_back(data);
291
0
    return true;
292
0
}
293
294
0
bool PerfCounters::get_sys_counters(std::vector<int64_t>& buffer) {
295
0
    for (int i = 0; i < _counters.size(); i++) {
296
0
        if (_counters[i].source == SYS_PERF_COUNTER) {
297
0
            int num_bytes = read(_counters[i].fd, &buffer[i], COUNTER_SIZE);
298
299
0
            if (num_bytes != COUNTER_SIZE) {
300
0
                return false;
301
0
            }
302
303
0
            if (_counters[i].type == TUnit::TIME_NS) {
304
0
                buffer[i] /= 1000000;
305
0
            }
306
0
        }
307
0
    }
308
309
0
    return true;
310
0
}
311
312
// Parse out IO counters from /proc/self/io.  The file contains a list of
313
// (name,byte) pairs.
314
// For example:
315
//    rchar: 210212
316
//    wchar: 94
317
//    syscr: 118
318
//    syscw: 3
319
//    read_bytes: 0
320
//    write_bytes: 0
321
//    cancelled_write_bytes: 0
322
0
bool PerfCounters::get_proc_self_io_counters(std::vector<int64_t>& buffer) {
323
0
    std::ifstream file("/proc/self/io", std::ios::in);
324
0
    std::string buf;
325
0
    int64_t values[PROC_IO_LAST_COUNTER];
326
0
    int ret = 0;
327
328
0
    for (int i = 0; i < PROC_IO_LAST_COUNTER; ++i) {
329
0
        if (!file) {
330
0
            ret = -1;
331
0
            break;
332
0
        }
333
334
0
        getline(file, buf);
335
0
        size_t colon = buf.find(':');
336
337
0
        if (colon == std::string::npos) {
338
0
            ret = -1;
339
0
            break;
340
0
        }
341
342
0
        buf = buf.substr(colon + 1);
343
0
        std::istringstream stream(buf);
344
0
        stream >> values[i];
345
0
    }
346
347
0
    if (ret == 0) {
348
0
        for (int i = 0; i < _counters.size(); ++i) {
349
0
            if (_counters[i].source == PROC_SELF_IO) {
350
0
                buffer[i] = values[_counters[i].proc_io_line_number];
351
0
            }
352
0
        }
353
0
    }
354
355
0
    if (file.is_open()) {
356
0
        file.close();
357
0
    }
358
359
0
    return true;
360
0
}
361
362
0
bool PerfCounters::get_proc_self_status_counters(std::vector<int64_t>& buffer) {
363
0
    std::ifstream file("/proc/self/status", std::ios::in);
364
0
    std::string buf;
365
366
0
    while (file) {
367
0
        getline(file, buf);
368
369
0
        for (int i = 0; i < _counters.size(); ++i) {
370
0
            if (_counters[i].source == PROC_SELF_STATUS) {
371
0
                size_t field = buf.find(_counters[i].proc_status_field);
372
373
0
                if (field == std::string::npos) {
374
0
                    continue;
375
0
                }
376
377
0
                size_t colon = field + _counters[i].proc_status_field.size() + 1;
378
0
                buf = buf.substr(colon + 1);
379
0
                std::istringstream stream(buf);
380
0
                int64_t value;
381
0
                stream >> value;
382
0
                buffer[i] = value * 1024; // values in file are in kb
383
0
            }
384
0
        }
385
0
    }
386
387
0
    if (file.is_open()) {
388
0
        file.close();
389
0
    }
390
391
0
    return true;
392
0
}
393
394
0
PerfCounters::PerfCounters() : _group_fd(-1) {}
395
396
// Close all fds for the counters
397
0
PerfCounters::~PerfCounters() {
398
0
    for (int i = 0; i < _counters.size(); ++i) {
399
0
        if (_counters[i].source == SYS_PERF_COUNTER) {
400
0
            close(_counters[i].fd);
401
0
        }
402
0
    }
403
0
}
404
405
// Add here the default ones that are most useful
406
0
bool PerfCounters::add_default_counters() {
407
0
    bool result = true;
408
0
    result &= add_counter(PERF_COUNTER_SW_CPU_CLOCK);
409
    // These hardware ones don't work on a vm, just ignore if they fail
410
    // TODO: these don't work reliably and aren't that useful.  Turn them off.
411
    //add_counter(PERF_COUNTER_HW_INSTRUCTIONS);
412
    //add_counter(PERF_COUNTER_HW_CPU_CYCLES);
413
    //add_counter(PERF_COUNTER_HW_BRANCHES);
414
    //add_counter(PERF_COUNTER_HW_BRANCH_MISSES);
415
    //add_counter(PERF_COUNTER_HW_CACHE_MISSES);
416
0
    add_counter(PERF_COUNTER_VM_USAGE);
417
0
    add_counter(PERF_COUNTER_VM_PEAK_USAGE);
418
0
    add_counter(PERF_COUNTER_RESIDENT_SET_SIZE);
419
0
    result &= add_counter(PERF_COUNTER_DISK_READ);
420
0
    return result;
421
0
}
422
423
// Add a specific counter
424
0
bool PerfCounters::add_counter(Counter counter) {
425
    // Ignore if it's already added.
426
0
    for (int i = 0; i < _counters.size(); ++i) {
427
0
        if (_counters[i].counter == counter) {
428
0
            return true;
429
0
        }
430
0
    }
431
432
0
    bool result = false;
433
434
0
    switch (counter) {
435
0
    case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK:
436
0
    case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS:
437
0
    case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES:
438
0
    case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS:
439
0
    case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES:
440
0
    case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS:
441
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_HIT:
442
0
    case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES:
443
0
    case PerfCounters::PERF_COUNTER_HW_BRANCHES:
444
0
    case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES:
445
0
    case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES:
446
0
        result = init_sys_counter(counter);
447
0
        break;
448
449
0
    case PerfCounters::PERF_COUNTER_BYTES_READ:
450
0
    case PerfCounters::PERF_COUNTER_BYTES_WRITE:
451
0
    case PerfCounters::PERF_COUNTER_DISK_READ:
452
0
    case PerfCounters::PERF_COUNTER_DISK_WRITE:
453
0
        result = init_proc_self_io_counter(counter);
454
0
        break;
455
456
0
    case PerfCounters::PERF_COUNTER_VM_USAGE:
457
0
    case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE:
458
0
    case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE:
459
0
        result = init_proc_self_status_counter(counter);
460
0
        break;
461
462
0
    default:
463
0
        return false;
464
0
    }
465
466
0
    if (result) {
467
0
        _counter_names.push_back(get_counter_name(counter));
468
0
    }
469
470
0
    return result;
471
0
}
472
473
// Query all the counters right now and store the values in results
474
0
void PerfCounters::snapshot(const std::string& name) {
475
0
    if (_counters.size() == 0) {
476
0
        return;
477
0
    }
478
479
0
    std::string fixed_name = name;
480
481
0
    if (fixed_name.size() == 0) {
482
0
        std::stringstream ss;
483
0
        ss << _snapshots.size() + 1;
484
0
        fixed_name = ss.str();
485
0
    }
486
487
0
    std::vector<int64_t> buffer(_counters.size());
488
489
0
    get_sys_counters(buffer);
490
0
    get_proc_self_io_counters(buffer);
491
0
    get_proc_self_status_counters(buffer);
492
493
0
    _snapshots.push_back(buffer);
494
0
    _snapshot_names.push_back(fixed_name);
495
0
}
496
497
0
const std::vector<int64_t>* PerfCounters::counters(int snapshot) const {
498
0
    if (snapshot < 0 || snapshot >= _snapshots.size()) {
499
0
        return nullptr;
500
0
    }
501
502
0
    return &_snapshots[snapshot];
503
0
}
504
505
0
void PerfCounters::pretty_print(std::ostream* s) const {
506
0
    std::ostream& stream = *s;
507
0
    stream << std::setw(8) << "snapshot";
508
509
0
    for (int i = 0; i < _counter_names.size(); ++i) {
510
0
        stream << std::setw(PRETTY_PRINT_WIDTH) << _counter_names[i];
511
0
    }
512
513
0
    stream << std::endl;
514
515
0
    for (int s = 0; s < _snapshots.size(); s++) {
516
0
        stream << std::setw(8) << _snapshot_names[s];
517
0
        const std::vector<int64_t>& snapshot = _snapshots[s];
518
519
0
        for (int i = 0; i < snapshot.size(); ++i) {
520
0
            stream << std::setw(PRETTY_PRINT_WIDTH)
521
0
                   << PrettyPrinter::print(snapshot[i], _counters[i].type);
522
0
        }
523
524
0
        stream << std::endl;
525
0
    }
526
527
0
    stream << std::endl;
528
0
}
529
530
// Refactor below
531
532
0
int PerfCounters::parse_int(const string& state_key) {
533
0
    auto it = _process_state.find(state_key);
534
0
    if (it != _process_state.end()) return atoi(it->second.c_str());
535
0
    return -1;
536
0
}
537
538
0
int64_t PerfCounters::parse_int64(const string& state_key) {
539
0
    auto it = _process_state.find(state_key);
540
0
    if (it != _process_state.end()) {
541
0
        StringParser::ParseResult result;
542
0
        int64_t state_value =
543
0
                StringParser::string_to_int<int64_t>(it->second.data(), it->second.size(), &result);
544
0
        if (result == StringParser::PARSE_SUCCESS) return state_value;
545
0
    }
546
0
    return -1;
547
0
}
548
549
0
string PerfCounters::parse_string(const string& state_key) {
550
0
    auto it = _process_state.find(state_key);
551
0
    if (it != _process_state.end()) return it->second;
552
0
    return string();
553
0
}
554
555
0
int64_t PerfCounters::parse_bytes(const string& state_key) {
556
0
    auto it = _process_state.find(state_key);
557
0
    if (it != _process_state.end()) {
558
0
        vector<string> fields = split(it->second, " ");
559
        // We expect state_value such as, e.g., '16129508', '16129508 kB', '16129508 mB'
560
0
        StringParser::ParseResult result;
561
0
        int64_t state_value =
562
0
                StringParser::string_to_int<int64_t>(fields[0].data(), fields[0].size(), &result);
563
0
        if (result == StringParser::PARSE_SUCCESS) {
564
0
            if (fields.size() < 2) return state_value;
565
0
            if (fields[1].compare("kB") == 0) return state_value * 1024L;
566
0
        }
567
0
    }
568
0
    return -1;
569
0
}
570
571
0
void PerfCounters::refresh_proc_status() {
572
0
    std::ifstream statusinfo("/proc/self/status", std::ios::in);
573
0
    std::string line;
574
0
    while (statusinfo.good() && !statusinfo.eof()) {
575
0
        getline(statusinfo, line);
576
0
        std::vector<std::string> fields = split(line, "\t");
577
0
        if (fields.size() < 2) continue;
578
0
        boost::algorithm::trim(fields[1]);
579
0
        std::string key = fields[0].substr(0, fields[0].size() - 1);
580
0
        _process_state[strings::Substitute("status/$0", key)] = fields[1];
581
0
    }
582
583
0
    if (statusinfo.is_open()) statusinfo.close();
584
585
0
    _vm_size = parse_bytes("status/VmSize");
586
0
    _vm_peak = parse_bytes("status/VmPeak");
587
0
    _vm_rss = parse_bytes("status/VmRSS");
588
0
#ifdef ADDRESS_SANITIZER
589
0
    _vm_rss_str = "[ASAN]" + PrettyPrinter::print(_vm_rss, TUnit::BYTES);
590
#else
591
    _vm_rss_str = PrettyPrinter::print(_vm_rss, TUnit::BYTES);
592
#endif
593
0
    _vm_hwm = parse_bytes("status/VmHWM");
594
0
}
595
596
0
void PerfCounters::get_proc_status(ProcStatus* out) {
597
0
    out->vm_size = parse_bytes("status/VmSize");
598
0
    out->vm_peak = parse_bytes("status/VmPeak");
599
0
    out->vm_rss = parse_bytes("status/VmRSS");
600
0
    out->vm_hwm = parse_bytes("status/VmHWM");
601
0
}
602
603
} // namespace doris