Coverage Report

Created: 2025-03-13 11:28

/root/doris/be/src/util/system_metrics.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/system_metrics.h"
19
20
#include <ctype.h>
21
// IWYU pragma: no_include <bthread/errno.h>
22
#include <errno.h> // IWYU pragma: keep
23
#include <glog/logging.h>
24
#include <inttypes.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
29
#include <functional>
30
#include <ostream>
31
#include <unordered_map>
32
#include <utility>
33
34
#include "gutil/strings/split.h" // for string split
35
#include "gutil/strtoint.h"      //  for atoi64
36
#include "runtime/workload_group/workload_group_metrics.h"
37
#include "util/cgroup_util.h"
38
#include "util/mem_info.h"
39
#include "util/perf_counters.h"
40
41
namespace doris {
42
43
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(avail_cpu_num, MetricUnit::NOUNIT);
44
45
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(host_cpu_num, MetricUnit::NOUNIT);
46
struct CpuNumberMetrics {
47
2
    CpuNumberMetrics(MetricEntity* ent) : entity(ent) {
48
2
        INT_COUNTER_METRIC_REGISTER(entity, host_cpu_num);
49
2
        INT_COUNTER_METRIC_REGISTER(entity, avail_cpu_num);
50
2
    }
51
52
    IntCounter* host_cpu_num {nullptr};
53
    IntCounter* avail_cpu_num {nullptr};
54
    MetricEntity* entity = nullptr;
55
};
56
57
#define DEFINE_CPU_COUNTER_METRIC(metric)                                            \
58
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(cpu_##metric, MetricUnit::PERCENT, "", cpu, \
59
                                         Labels({{"mode", #metric}}));
60
DEFINE_CPU_COUNTER_METRIC(user);
61
DEFINE_CPU_COUNTER_METRIC(nice);
62
DEFINE_CPU_COUNTER_METRIC(system);
63
DEFINE_CPU_COUNTER_METRIC(idle);
64
DEFINE_CPU_COUNTER_METRIC(iowait);
65
DEFINE_CPU_COUNTER_METRIC(irq);
66
DEFINE_CPU_COUNTER_METRIC(soft_irq);
67
DEFINE_CPU_COUNTER_METRIC(steal);
68
DEFINE_CPU_COUNTER_METRIC(guest);
69
DEFINE_CPU_COUNTER_METRIC(guest_nice);
70
71
// /proc/stat: http://www.linuxhowtos.org/System/procstat.htm
72
struct CpuMetrics {
73
33
    CpuMetrics(MetricEntity* ent) : entity(ent) {
74
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_user);
75
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_nice);
76
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_system);
77
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_idle);
78
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_iowait);
79
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_irq);
80
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_soft_irq);
81
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_steal);
82
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest);
83
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest_nice);
84
85
33
        metrics[0] = cpu_user;
86
33
        metrics[1] = cpu_nice;
87
33
        metrics[2] = cpu_system;
88
33
        metrics[3] = cpu_idle;
89
33
        metrics[4] = cpu_iowait;
90
33
        metrics[5] = cpu_irq;
91
33
        metrics[6] = cpu_soft_irq;
92
33
        metrics[7] = cpu_steal;
93
33
        metrics[8] = cpu_guest;
94
33
        metrics[9] = cpu_guest_nice;
95
33
    }
96
97
    static constexpr int cpu_num_metrics = 10;
98
99
    MetricEntity* entity = nullptr;
100
    IntCounter* cpu_user;
101
    IntCounter* cpu_nice;
102
    IntCounter* cpu_system;
103
    IntCounter* cpu_idle;
104
    IntCounter* cpu_iowait;
105
    IntCounter* cpu_irq;
106
    IntCounter* cpu_soft_irq;
107
    IntCounter* cpu_steal;
108
    IntCounter* cpu_guest;
109
    IntCounter* cpu_guest_nice;
110
111
    IntCounter* metrics[cpu_num_metrics];
112
};
113
114
#define DEFINE_MEMORY_GAUGE_METRIC(metric, unit) \
115
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(memory_##metric, unit);
116
DEFINE_MEMORY_GAUGE_METRIC(allocated_bytes, MetricUnit::BYTES);
117
DEFINE_MEMORY_GAUGE_METRIC(pgpgin, MetricUnit::NOUNIT);
118
DEFINE_MEMORY_GAUGE_METRIC(pgpgout, MetricUnit::NOUNIT);
119
DEFINE_MEMORY_GAUGE_METRIC(pswpin, MetricUnit::NOUNIT);
120
DEFINE_MEMORY_GAUGE_METRIC(pswpout, MetricUnit::NOUNIT);
121
#ifndef USE_JEMALLOC
122
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_allocated_bytes, MetricUnit::BYTES);
123
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_total_thread_cache_bytes, MetricUnit::BYTES);
124
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_central_cache_free_bytes, MetricUnit::BYTES);
125
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_transfer_cache_free_bytes, MetricUnit::BYTES);
126
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_thread_cache_free_bytes, MetricUnit::BYTES);
127
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_free_bytes, MetricUnit::BYTES);
128
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_unmapped_bytes, MetricUnit::BYTES);
129
#else
130
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_allocated_bytes, MetricUnit::BYTES);
131
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_active_bytes, MetricUnit::BYTES);
132
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES);
133
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES);
134
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES);
135
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES);
136
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES);
137
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT);
138
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT);
139
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT);
140
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT);
141
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT);
142
#endif
143
144
struct MemoryMetrics {
145
2
    MemoryMetrics(MetricEntity* ent) : entity(ent) {
146
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_allocated_bytes);
147
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgin);
148
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgout);
149
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpin);
150
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpout);
151
152
2
#ifndef USE_JEMALLOC
153
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_allocated_bytes);
154
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_total_thread_cache_bytes);
155
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_central_cache_free_bytes);
156
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_transfer_cache_free_bytes);
157
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_thread_cache_free_bytes);
158
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_free_bytes);
159
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_unmapped_bytes);
160
#else
161
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_allocated_bytes);
162
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_active_bytes);
163
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_metadata_bytes);
164
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes);
165
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes);
166
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes);
167
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes);
168
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num);
169
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num);
170
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num);
171
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num);
172
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num);
173
#endif
174
2
    }
175
176
    MetricEntity* entity = nullptr;
177
    IntGauge* memory_allocated_bytes;
178
    IntGauge* memory_pgpgin;
179
    IntGauge* memory_pgpgout;
180
    IntGauge* memory_pswpin;
181
    IntGauge* memory_pswpout;
182
183
#ifndef USE_JEMALLOC
184
    IntGauge* memory_tcmalloc_allocated_bytes;
185
    IntGauge* memory_tcmalloc_total_thread_cache_bytes;
186
    IntGauge* memory_tcmalloc_central_cache_free_bytes;
187
    IntGauge* memory_tcmalloc_transfer_cache_free_bytes;
188
    IntGauge* memory_tcmalloc_thread_cache_free_bytes;
189
    IntGauge* memory_tcmalloc_pageheap_free_bytes;
190
    IntGauge* memory_tcmalloc_pageheap_unmapped_bytes;
191
#else
192
    IntGauge* memory_jemalloc_allocated_bytes;
193
    IntGauge* memory_jemalloc_active_bytes;
194
    IntGauge* memory_jemalloc_metadata_bytes;
195
    IntGauge* memory_jemalloc_resident_bytes;
196
    IntGauge* memory_jemalloc_mapped_bytes;
197
    IntGauge* memory_jemalloc_retained_bytes;
198
    IntGauge* memory_jemalloc_tcache_bytes;
199
    IntGauge* memory_jemalloc_pactive_num;
200
    IntGauge* memory_jemalloc_pdirty_num;
201
    IntGauge* memory_jemalloc_pmuzzy_num;
202
    IntGauge* memory_jemalloc_dirty_purged_num;
203
    IntGauge* memory_jemalloc_muzzy_purged_num;
204
#endif
205
};
206
207
#define DEFINE_DISK_COUNTER_METRIC(metric, unit) \
208
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(disk_##metric, unit);
209
DEFINE_DISK_COUNTER_METRIC(reads_completed, MetricUnit::OPERATIONS);
210
DEFINE_DISK_COUNTER_METRIC(bytes_read, MetricUnit::BYTES);
211
DEFINE_DISK_COUNTER_METRIC(read_time_ms, MetricUnit::MILLISECONDS);
212
DEFINE_DISK_COUNTER_METRIC(writes_completed, MetricUnit::OPERATIONS);
213
DEFINE_DISK_COUNTER_METRIC(bytes_written, MetricUnit::BYTES);
214
DEFINE_DISK_COUNTER_METRIC(write_time_ms, MetricUnit::MILLISECONDS);
215
DEFINE_DISK_COUNTER_METRIC(io_time_ms, MetricUnit::MILLISECONDS);
216
DEFINE_DISK_COUNTER_METRIC(io_time_weigthed, MetricUnit::MILLISECONDS);
217
218
struct DiskMetrics {
219
2
    DiskMetrics(MetricEntity* ent) : entity(ent) {
220
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_reads_completed);
221
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_read);
222
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_read_time_ms);
223
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_writes_completed);
224
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_written);
225
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_write_time_ms);
226
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_ms);
227
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_weigthed);
228
2
    }
229
230
    MetricEntity* entity = nullptr;
231
    IntCounter* disk_reads_completed;
232
    IntCounter* disk_bytes_read;
233
    IntCounter* disk_read_time_ms;
234
    IntCounter* disk_writes_completed;
235
    IntCounter* disk_bytes_written;
236
    IntCounter* disk_write_time_ms;
237
    IntCounter* disk_io_time_ms;
238
    IntCounter* disk_io_time_weigthed;
239
};
240
241
#define DEFINE_NETWORK_COUNTER_METRIC(metric, unit) \
242
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(network_##metric, unit);
243
DEFINE_NETWORK_COUNTER_METRIC(receive_bytes, MetricUnit::BYTES);
244
DEFINE_NETWORK_COUNTER_METRIC(receive_packets, MetricUnit::PACKETS);
245
DEFINE_NETWORK_COUNTER_METRIC(send_bytes, MetricUnit::BYTES);
246
DEFINE_NETWORK_COUNTER_METRIC(send_packets, MetricUnit::PACKETS);
247
248
struct NetworkMetrics {
249
2
    NetworkMetrics(MetricEntity* ent) : entity(ent) {
250
2
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_bytes);
251
2
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_packets);
252
2
        INT_COUNTER_METRIC_REGISTER(entity, network_send_bytes);
253
2
        INT_COUNTER_METRIC_REGISTER(entity, network_send_packets);
254
2
    }
255
256
    MetricEntity* entity = nullptr;
257
    IntCounter* network_receive_bytes;
258
    IntCounter* network_receive_packets;
259
    IntCounter* network_send_bytes;
260
    IntCounter* network_send_packets;
261
};
262
263
#define DEFINE_SNMP_COUNTER_METRIC(metric, unit, desc) \
264
    DEFINE_COUNTER_METRIC_PROTOTYPE_3ARG(snmp_##metric, unit, desc);
265
DEFINE_SNMP_COUNTER_METRIC(tcp_in_errs, MetricUnit::NOUNIT,
266
                           "The number of all problematic TCP packets received");
267
DEFINE_SNMP_COUNTER_METRIC(tcp_retrans_segs, MetricUnit::NOUNIT, "All TCP packets retransmitted");
268
DEFINE_SNMP_COUNTER_METRIC(tcp_in_segs, MetricUnit::NOUNIT, "All received TCP packets");
269
DEFINE_SNMP_COUNTER_METRIC(tcp_out_segs, MetricUnit::NOUNIT, "All send TCP packets with RST mark");
270
271
// metrics read from /proc/net/snmp
272
struct SnmpMetrics {
273
2
    SnmpMetrics(MetricEntity* ent) : entity(ent) {
274
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_errs);
275
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_retrans_segs);
276
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_segs);
277
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_out_segs);
278
2
    }
279
280
    MetricEntity* entity = nullptr;
281
    IntCounter* snmp_tcp_in_errs;
282
    IntCounter* snmp_tcp_retrans_segs;
283
    IntCounter* snmp_tcp_in_segs;
284
    IntCounter* snmp_tcp_out_segs;
285
};
286
287
#define DEFINE_FD_COUNTER_METRIC(metric, unit) \
288
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fd_##metric, unit);
289
DEFINE_FD_COUNTER_METRIC(num_limit, MetricUnit::NOUNIT);
290
DEFINE_FD_COUNTER_METRIC(num_used, MetricUnit::NOUNIT);
291
292
struct FileDescriptorMetrics {
293
2
    FileDescriptorMetrics(MetricEntity* ent) : entity(ent) {
294
2
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_limit);
295
2
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_used);
296
2
    }
297
298
    MetricEntity* entity = nullptr;
299
    IntGauge* fd_num_limit;
300
    IntGauge* fd_num_used;
301
};
302
303
#define DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(metric)                                     \
304
    DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_average_##metric, MetricUnit::NOUNIT, "", \
305
                                       load_average, Labels({{"mode", #metric}}));
306
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(1_minutes);
307
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(5_minutes);
308
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(15_minutes);
309
310
struct LoadAverageMetrics {
311
2
    LoadAverageMetrics(MetricEntity* ent) : entity(ent) {
312
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_1_minutes);
313
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_5_minutes);
314
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_15_minutes);
315
2
    }
316
317
    MetricEntity* entity = nullptr;
318
    DoubleGauge* load_average_1_minutes;
319
    DoubleGauge* load_average_5_minutes;
320
    DoubleGauge* load_average_15_minutes;
321
};
322
323
#define DEFINE_PROC_STAT_COUNTER_METRIC(metric)                                       \
324
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(proc_##metric, MetricUnit::NOUNIT, "", proc, \
325
                                         Labels({{"mode", #metric}}));
326
DEFINE_PROC_STAT_COUNTER_METRIC(interrupt);
327
DEFINE_PROC_STAT_COUNTER_METRIC(ctxt_switch);
328
DEFINE_PROC_STAT_COUNTER_METRIC(procs_running);
329
DEFINE_PROC_STAT_COUNTER_METRIC(procs_blocked);
330
331
struct ProcMetrics {
332
2
    ProcMetrics(MetricEntity* ent) : entity(ent) {
333
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_interrupt);
334
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_ctxt_switch);
335
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_running);
336
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_blocked);
337
2
    }
338
339
    MetricEntity* entity = nullptr;
340
341
    IntCounter* proc_interrupt;
342
    IntCounter* proc_ctxt_switch;
343
    IntCounter* proc_procs_running;
344
    IntCounter* proc_procs_blocked;
345
};
346
347
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT);
348
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, MetricUnit::BYTES);
349
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, MetricUnit::BYTES);
350
351
const char* SystemMetrics::_s_hook_name = "system_metrics";
352
353
SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::string>& disk_devices,
354
2
                             const std::vector<std::string>& network_interfaces) {
355
2
    DCHECK(registry != nullptr);
356
2
    _registry = registry;
357
2
    _server_entity = _registry->register_entity("server");
358
2
    DCHECK(_server_entity != nullptr);
359
2
    _server_entity->register_hook(_s_hook_name, std::bind(&SystemMetrics::update, this));
360
2
    _install_cpu_metrics();
361
2
    _install_memory_metrics(_server_entity.get());
362
2
    _install_disk_metrics(disk_devices);
363
2
    _install_net_metrics(network_interfaces);
364
2
    _install_fd_metrics(_server_entity.get());
365
2
    _install_snmp_metrics(_server_entity.get());
366
2
    _install_load_avg_metrics(_server_entity.get());
367
2
    _install_proc_metrics(_server_entity.get());
368
369
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_disk_io_util_percent);
370
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_send_bytes_rate);
371
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_receive_bytes_rate);
372
2
}
373
374
2
SystemMetrics::~SystemMetrics() {
375
2
    DCHECK(_server_entity != nullptr);
376
2
    _server_entity->deregister_hook(_s_hook_name);
377
378
33
    for (auto& it : _cpu_metrics) {
379
33
        delete it.second;
380
33
    }
381
2
    for (auto& it : _disk_metrics) {
382
2
        delete it.second;
383
2
    }
384
2
    for (auto& it : _network_metrics) {
385
2
        delete it.second;
386
2
    }
387
2
    if (_line_ptr != nullptr) {
388
1
        free(_line_ptr);
389
1
    }
390
2
}
391
392
1
void SystemMetrics::update() {
393
1
    _update_cpu_metrics();
394
1
    _update_memory_metrics();
395
1
    _update_disk_metrics();
396
1
    _update_net_metrics();
397
1
    _update_fd_metrics();
398
1
    _update_snmp_metrics();
399
1
    _update_load_avg_metrics();
400
1
    _update_proc_metrics();
401
1
}
402
403
2
void SystemMetrics::_install_cpu_metrics() {
404
2
    get_cpu_name();
405
406
2
    int cpu_num = 0;
407
33
    for (auto cpu_name : _cpu_names) {
408
        // NOTE: cpu_name comes from /proc/stat which named 'cpu' is not a real cpu name, it should be skipped.
409
33
        if (cpu_name != "cpu") {
410
32
            cpu_num++;
411
32
        }
412
33
        auto cpu_entity = _registry->register_entity(cpu_name, {{"device", cpu_name}});
413
33
        CpuMetrics* metrics = new CpuMetrics(cpu_entity.get());
414
33
        _cpu_metrics.emplace(cpu_name, metrics);
415
33
    }
416
417
2
    auto cpu_num_entity = _registry->register_entity("doris_be_host_cpu_num");
418
2
    _cpu_num_metrics = std::make_unique<CpuNumberMetrics>(cpu_num_entity.get());
419
420
2
    _cpu_num_metrics->host_cpu_num->set_value(cpu_num);
421
2
}
422
423
#ifdef BE_TEST
424
const char* k_ut_stat_path;
425
const char* k_ut_diskstats_path;
426
const char* k_ut_net_dev_path;
427
const char* k_ut_fd_path;
428
const char* k_ut_net_snmp_path;
429
const char* k_ut_load_avg_path;
430
const char* k_ut_vmstat_path;
431
#endif
432
433
1
void SystemMetrics::_update_cpu_metrics() {
434
1
#ifdef BE_TEST
435
1
    FILE* fp = fopen(k_ut_stat_path, "r");
436
#else
437
    FILE* fp = fopen("/proc/stat", "r");
438
#endif
439
1
    if (fp == nullptr) {
440
0
        char buf[64];
441
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
442
0
                     << ", message=" << strerror_r(errno, buf, 64);
443
0
        return;
444
0
    }
445
446
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
447
40
        char cpu[16];
448
40
        int64_t values[CpuMetrics::cpu_num_metrics];
449
40
        memset(values, 0, sizeof(values));
450
40
        int num = sscanf(_line_ptr,
451
40
                         "%15s"
452
40
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
453
40
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
454
40
                         cpu, &values[0], &values[1], &values[2], &values[3], &values[4],
455
40
                         &values[5], &values[6], &values[7], &values[8], &values[9]);
456
40
        if (num < 4) {
457
5
            continue;
458
5
        }
459
460
35
        std::string cpu_name(cpu);
461
35
        auto it = _cpu_metrics.find(cpu_name);
462
35
        if (it == _cpu_metrics.end()) {
463
2
            continue;
464
2
        }
465
466
363
        for (int i = 0; i < CpuMetrics::cpu_num_metrics; ++i) {
467
330
            it->second->metrics[i]->set_value(values[i]);
468
330
        }
469
33
    }
470
471
1
    if (ferror(fp) != 0) {
472
0
        char buf[64];
473
0
        LOG(WARNING) << "getline failed, errno=" << errno
474
0
                     << ", message=" << strerror_r(errno, buf, 64);
475
0
    }
476
477
1
    fclose(fp);
478
1
}
479
480
2
void SystemMetrics::_install_memory_metrics(MetricEntity* entity) {
481
2
    _memory_metrics.reset(new MemoryMetrics(entity));
482
2
}
483
484
1
void SystemMetrics::_update_memory_metrics() {
485
1
    _memory_metrics->memory_allocated_bytes->set_value(PerfCounters::get_vm_rss());
486
1
    get_metrics_from_proc_vmstat();
487
1
}
488
489
0
void SystemMetrics::update_allocator_metrics() {
490
0
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
491
0
    LOG(INFO) << "Memory tracking is not available with address sanitizer builds.";
492
#elif defined(USE_JEMALLOC)
493
    _memory_metrics->memory_jemalloc_allocated_bytes->set_value(
494
            MemInfo::get_je_metrics("stats.allocated"));
495
    _memory_metrics->memory_jemalloc_active_bytes->set_value(
496
            MemInfo::get_je_metrics("stats.active"));
497
    _memory_metrics->memory_jemalloc_metadata_bytes->set_value(
498
            MemInfo::get_je_metrics("stats.metadata"));
499
    _memory_metrics->memory_jemalloc_resident_bytes->set_value(
500
            MemInfo::get_je_metrics("stats.resident"));
501
    _memory_metrics->memory_jemalloc_mapped_bytes->set_value(
502
            MemInfo::get_je_metrics("stats.mapped"));
503
    _memory_metrics->memory_jemalloc_retained_bytes->set_value(
504
            MemInfo::get_je_metrics("stats.retained"));
505
    _memory_metrics->memory_jemalloc_tcache_bytes->set_value(
506
            MemInfo::get_je_all_arena_metrics("tcache_bytes"));
507
    _memory_metrics->memory_jemalloc_pactive_num->set_value(
508
            MemInfo::get_je_all_arena_metrics("pactive"));
509
    _memory_metrics->memory_jemalloc_pdirty_num->set_value(
510
            MemInfo::get_je_all_arena_metrics("pdirty"));
511
    _memory_metrics->memory_jemalloc_pmuzzy_num->set_value(
512
            MemInfo::get_je_all_arena_metrics("pmuzzy"));
513
    _memory_metrics->memory_jemalloc_dirty_purged_num->set_value(
514
            MemInfo::get_je_all_arena_metrics("dirty_purged"));
515
    _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value(
516
            MemInfo::get_je_all_arena_metrics("muzzy_purged"));
517
#else
518
    _memory_metrics->memory_tcmalloc_allocated_bytes->set_value(
519
            MemInfo::get_tc_metrics("generic.total_physical_bytes"));
520
    _memory_metrics->memory_tcmalloc_total_thread_cache_bytes->set_value(
521
            MemInfo::allocator_cache_mem());
522
    _memory_metrics->memory_tcmalloc_central_cache_free_bytes->set_value(
523
            MemInfo::get_tc_metrics("tcmalloc.central_cache_free_bytes"));
524
    _memory_metrics->memory_tcmalloc_transfer_cache_free_bytes->set_value(
525
            MemInfo::get_tc_metrics("tcmalloc.transfer_cache_free_bytes"));
526
    _memory_metrics->memory_tcmalloc_thread_cache_free_bytes->set_value(
527
            MemInfo::get_tc_metrics("tcmalloc.thread_cache_free_bytes"));
528
    _memory_metrics->memory_tcmalloc_pageheap_free_bytes->set_value(
529
            MemInfo::get_tc_metrics("tcmalloc.pageheap_free_bytes"));
530
    _memory_metrics->memory_tcmalloc_pageheap_unmapped_bytes->set_value(
531
            MemInfo::get_tc_metrics("tcmalloc.pageheap_unmapped_bytes"));
532
#endif
533
0
}
534
535
2
void SystemMetrics::_install_disk_metrics(const std::set<std::string>& disk_devices) {
536
2
    for (auto& disk_device : disk_devices) {
537
2
        auto disk_entity = _registry->register_entity(std::string("disk_metrics.") + disk_device,
538
2
                                                      {{"device", disk_device}});
539
2
        DiskMetrics* metrics = new DiskMetrics(disk_entity.get());
540
2
        _disk_metrics.emplace(disk_device, metrics);
541
2
    }
542
2
}
543
544
1
void SystemMetrics::_update_disk_metrics() {
545
1
#ifdef BE_TEST
546
1
    FILE* fp = fopen(k_ut_diskstats_path, "r");
547
#else
548
    FILE* fp = fopen("/proc/diskstats", "r");
549
#endif
550
1
    if (fp == nullptr) {
551
0
        char buf[64];
552
0
        LOG(WARNING) << "open /proc/diskstats failed, errno=" << errno
553
0
                     << ", message=" << strerror_r(errno, buf, 64);
554
0
        return;
555
0
    }
556
557
    // /proc/diskstats: https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
558
    // 1 - major number
559
    // 2 - minor mumber
560
    // 3 - device name
561
    // 4 - reads completed successfully
562
    // 5 - reads merged
563
    // 6 - sectors read
564
    // 7 - time spent reading (ms)
565
    // 8 - writes completed
566
    // 9 - writes merged
567
    // 10 - sectors written
568
    // 11 - time spent writing (ms)
569
    // 12 - I/Os currently in progress
570
    // 13 - time spent doing I/Os (ms)
571
    // 14 - weighted time spent doing I/Os (ms)
572
    // I think 1024 is enough for device name
573
1
    int major = 0;
574
1
    int minor = 0;
575
1
    char device[1024];
576
1
    int64_t values[11];
577
35
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
578
34
        memset(values, 0, sizeof(values));
579
34
        int num = sscanf(_line_ptr,
580
34
                         "%d %d %1023s"
581
34
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
582
34
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
583
34
                         &major, &minor, device, &values[0], &values[1], &values[2], &values[3],
584
34
                         &values[4], &values[5], &values[6], &values[7], &values[8], &values[9],
585
34
                         &values[10]);
586
34
        if (num < 4) {
587
0
            continue;
588
0
        }
589
34
        auto it = _disk_metrics.find(device);
590
34
        if (it == _disk_metrics.end()) {
591
33
            continue;
592
33
        }
593
        // update disk metrics
594
        // reads_completed: 4 reads completed successfully
595
1
        it->second->disk_reads_completed->set_value(values[0]);
596
        // bytes_read: 6 sectors read * 512; 5 reads merged is ignored
597
1
        it->second->disk_bytes_read->set_value(values[2] * 512);
598
        // read_time_ms: 7 time spent reading (ms)
599
1
        it->second->disk_read_time_ms->set_value(values[3]);
600
        // writes_completed: 8 writes completed
601
1
        it->second->disk_writes_completed->set_value(values[4]);
602
        // bytes_written: 10 sectors write * 512; 9 writes merged is ignored
603
1
        it->second->disk_bytes_written->set_value(values[6] * 512);
604
        // write_time_ms: 11 time spent writing (ms)
605
1
        it->second->disk_write_time_ms->set_value(values[7]);
606
        // io_time_ms: 13 time spent doing I/Os (ms)
607
1
        it->second->disk_io_time_ms->set_value(values[9]);
608
        // io_time_weigthed: 14 - weighted time spent doing I/Os (ms)
609
1
        it->second->disk_io_time_weigthed->set_value(values[10]);
610
1
    }
611
1
    if (ferror(fp) != 0) {
612
0
        char buf[64];
613
0
        LOG(WARNING) << "getline failed, errno=" << errno
614
0
                     << ", message=" << strerror_r(errno, buf, 64);
615
0
    }
616
1
    fclose(fp);
617
1
}
618
619
2
void SystemMetrics::_install_net_metrics(const std::vector<std::string>& interfaces) {
620
2
    for (auto& interface : interfaces) {
621
2
        auto interface_entity = _registry->register_entity(
622
2
                std::string("network_metrics.") + interface, {{"device", interface}});
623
2
        NetworkMetrics* metrics = new NetworkMetrics(interface_entity.get());
624
2
        _network_metrics.emplace(interface, metrics);
625
2
    }
626
2
}
627
628
2
void SystemMetrics::_install_snmp_metrics(MetricEntity* entity) {
629
2
    _snmp_metrics.reset(new SnmpMetrics(entity));
630
2
}
631
632
1
void SystemMetrics::_update_net_metrics() {
633
1
#ifdef BE_TEST
634
    // to mock proc
635
1
    FILE* fp = fopen(k_ut_net_dev_path, "r");
636
#else
637
    FILE* fp = fopen("/proc/net/dev", "r");
638
#endif
639
1
    if (fp == nullptr) {
640
0
        char buf[64];
641
0
        LOG(WARNING) << "open /proc/net/dev failed, errno=" << errno
642
0
                     << ", message=" << strerror_r(errno, buf, 64);
643
0
        return;
644
0
    }
645
646
    // Ignore header
647
1
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0 ||
648
1
        getline(&_line_ptr, &_line_buf_size, fp) < 0) {
649
0
        char buf[64];
650
0
        LOG(WARNING) << "read /proc/net/dev first two line failed, errno=" << errno
651
0
                     << ", message=" << strerror_r(errno, buf, 64);
652
0
        fclose(fp);
653
0
        return;
654
0
    }
655
1
    if (_proc_net_dev_version == 0) {
656
1
        if (strstr(_line_ptr, "compressed") != nullptr) {
657
1
            _proc_net_dev_version = 3;
658
1
        } else if (strstr(_line_ptr, "bytes") != nullptr) {
659
0
            _proc_net_dev_version = 2;
660
0
        } else {
661
0
            _proc_net_dev_version = 1;
662
0
        }
663
1
    }
664
665
5
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
666
4
        char* ptr = strrchr(_line_ptr, ':');
667
4
        if (ptr == nullptr) {
668
0
            continue;
669
0
        }
670
4
        char* start = _line_ptr;
671
13
        while (isspace(*start)) {
672
9
            start++;
673
9
        }
674
4
        std::string interface(start, ptr - start);
675
4
        auto it = _network_metrics.find(interface);
676
4
        if (it == _network_metrics.end()) {
677
3
            continue;
678
3
        }
679
1
        ptr++;
680
1
        int64_t receive_bytes = 0;
681
1
        int64_t receive_packets = 0;
682
1
        int64_t send_bytes = 0;
683
1
        int64_t send_packets = 0;
684
1
        switch (_proc_net_dev_version) {
685
1
        case 3:
686
            // receive: bytes packets errs drop fifo frame compressed multicast
687
            // send:    bytes packets errs drop fifo colls carrier compressed
688
1
            sscanf(ptr,
689
1
                   " %" PRId64 " %" PRId64
690
1
                   " %*d %*d %*d %*d %*d %*d"
691
1
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d %*d",
692
1
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
693
1
            break;
694
0
        case 2:
695
            // receive: bytes packets errs drop fifo frame
696
            // send:    bytes packets errs drop fifo colls carrier
697
0
            sscanf(ptr,
698
0
                   " %" PRId64 " %" PRId64
699
0
                   " %*d %*d %*d %*d"
700
0
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d",
701
0
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
702
0
            break;
703
0
        case 1:
704
            // receive: packets errs drop fifo frame
705
            // send: packets errs drop fifo colls carrier
706
0
            sscanf(ptr,
707
0
                   " %" PRId64
708
0
                   " %*d %*d %*d %*d"
709
0
                   " %" PRId64 " %*d %*d %*d %*d %*d",
710
0
                   &receive_packets, &send_packets);
711
0
            break;
712
0
        default:
713
0
            break;
714
1
        }
715
1
        it->second->network_receive_bytes->set_value(receive_bytes);
716
1
        it->second->network_receive_packets->set_value(receive_packets);
717
1
        it->second->network_send_bytes->set_value(send_bytes);
718
1
        it->second->network_send_packets->set_value(send_packets);
719
1
    }
720
1
    if (ferror(fp) != 0) {
721
0
        char buf[64];
722
0
        LOG(WARNING) << "getline failed, errno=" << errno
723
0
                     << ", message=" << strerror_r(errno, buf, 64);
724
0
    }
725
1
    fclose(fp);
726
1
}
727
728
1
void SystemMetrics::_update_snmp_metrics() {
729
1
#ifdef BE_TEST
730
    // to mock proc
731
1
    FILE* fp = fopen(k_ut_net_snmp_path, "r");
732
#else
733
    FILE* fp = fopen("/proc/net/snmp", "r");
734
#endif
735
1
    if (fp == nullptr) {
736
0
        char buf[64];
737
0
        LOG(WARNING) << "open /proc/net/snmp failed, errno=" << errno
738
0
                     << ", message=" << strerror_r(errno, buf, 64);
739
0
        return;
740
0
    }
741
742
    // We only care about Tcp lines, so skip other lines in front of Tcp line
743
1
    int res = 0;
744
7
    while ((res = getline(&_line_ptr, &_line_buf_size, fp)) > 0) {
745
7
        if (strstr(_line_ptr, "Tcp") != nullptr) {
746
1
            break;
747
1
        }
748
7
    }
749
1
    if (res <= 0) {
750
0
        char buf[64];
751
0
        LOG(WARNING) << "failed to skip lines of /proc/net/snmp, errno=" << errno
752
0
                     << ", message=" << strerror_r(errno, buf, 64);
753
0
        fclose(fp);
754
0
        return;
755
0
    }
756
757
    // parse the Tcp header
758
    // Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors
759
1
    std::vector<std::string> headers = strings::Split(_line_ptr, " ");
760
1
    std::unordered_map<std::string, int32_t> header_map;
761
1
    int32_t pos = 0;
762
16
    for (auto& h : headers) {
763
16
        header_map.emplace(h, pos++);
764
16
    }
765
766
    // read the metrics of TCP
767
1
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0) {
768
0
        char buf[64];
769
0
        LOG(WARNING) << "failed to skip Tcp header line of /proc/net/snmp, errno=" << errno
770
0
                     << ", message=" << strerror_r(errno, buf, 64);
771
0
        fclose(fp);
772
0
        return;
773
0
    }
774
775
    // metric line looks like:
776
    // Tcp: 1 200 120000 -1 47849374 38601877 3353843 2320314 276 1033354613 1166025166 825439 12694 23238924 0
777
1
    std::vector<std::string> metrics = strings::Split(_line_ptr, " ");
778
1
    if (metrics.size() != headers.size()) {
779
0
        LOG(WARNING) << "invalid tcp metrics line: " << _line_ptr;
780
0
        fclose(fp);
781
0
        return;
782
0
    }
783
1
    int64_t retrans_segs = atoi64(metrics[header_map["RetransSegs"]]);
784
1
    int64_t in_errs = atoi64(metrics[header_map["InErrs"]]);
785
1
    int64_t in_segs = atoi64(metrics[header_map["InSegs"]]);
786
1
    int64_t out_segs = atoi64(metrics[header_map["OutSegs"]]);
787
1
    _snmp_metrics->snmp_tcp_retrans_segs->set_value(retrans_segs);
788
1
    _snmp_metrics->snmp_tcp_in_errs->set_value(in_errs);
789
1
    _snmp_metrics->snmp_tcp_in_segs->set_value(in_segs);
790
1
    _snmp_metrics->snmp_tcp_out_segs->set_value(out_segs);
791
792
1
    if (ferror(fp) != 0) {
793
0
        char buf[64];
794
0
        LOG(WARNING) << "getline failed, errno=" << errno
795
0
                     << ", message=" << strerror_r(errno, buf, 64);
796
0
    }
797
1
    fclose(fp);
798
1
}
799
800
2
void SystemMetrics::_install_fd_metrics(MetricEntity* entity) {
801
2
    _fd_metrics.reset(new FileDescriptorMetrics(entity));
802
2
}
803
804
1
void SystemMetrics::_update_fd_metrics() {
805
1
#ifdef BE_TEST
806
1
    FILE* fp = fopen(k_ut_fd_path, "r");
807
#else
808
    FILE* fp = fopen("/proc/sys/fs/file-nr", "r");
809
#endif
810
1
    if (fp == nullptr) {
811
0
        char buf[64];
812
0
        LOG(WARNING) << "open /proc/sys/fs/file-nr failed, errno=" << errno
813
0
                     << ", message=" << strerror_r(errno, buf, 64);
814
0
        return;
815
0
    }
816
817
    // /proc/sys/fs/file-nr: https://www.kernel.org/doc/Documentation/sysctl/fs.txt
818
    // 1 - the number of allocated file handles
819
    // 2 - the number of allocated but unused file handles
820
    // 3 - the maximum number of file handles
821
822
1
    int64_t values[3];
823
1
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
824
1
        memset(values, 0, sizeof(values));
825
1
        int num = sscanf(_line_ptr, "%" PRId64 " %" PRId64 " %" PRId64, &values[0], &values[1],
826
1
                         &values[2]);
827
1
        if (num == 3) {
828
1
            _fd_metrics->fd_num_limit->set_value(values[2]);
829
1
            _fd_metrics->fd_num_used->set_value(values[0] - values[1]);
830
1
        }
831
1
    }
832
833
1
    if (ferror(fp) != 0) {
834
0
        char buf[64];
835
0
        LOG(WARNING) << "getline failed, errno=" << errno
836
0
                     << ", message=" << strerror_r(errno, buf, 64);
837
0
    }
838
1
    fclose(fp);
839
1
}
840
841
2
void SystemMetrics::_install_load_avg_metrics(MetricEntity* entity) {
842
2
    _load_average_metrics.reset(new LoadAverageMetrics(entity));
843
2
}
844
845
1
void SystemMetrics::_update_load_avg_metrics() {
846
1
#ifdef BE_TEST
847
1
    FILE* fp = fopen(k_ut_load_avg_path, "r");
848
#else
849
    FILE* fp = fopen("/proc/loadavg", "r");
850
#endif
851
1
    if (fp == nullptr) {
852
0
        char buf[64];
853
0
        LOG(WARNING) << "open /proc/loadavg failed, errno=" << errno
854
0
                     << ", message=" << strerror_r(errno, buf, 64);
855
0
        return;
856
0
    }
857
858
1
    double values[3];
859
1
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
860
1
        memset(values, 0, sizeof(values));
861
1
        int num = sscanf(_line_ptr, "%lf %lf %lf", &values[0], &values[1], &values[2]);
862
1
        if (num == 3) {
863
1
            _load_average_metrics->load_average_1_minutes->set_value(values[0]);
864
1
            _load_average_metrics->load_average_5_minutes->set_value(values[1]);
865
1
            _load_average_metrics->load_average_15_minutes->set_value(values[2]);
866
1
        }
867
1
    }
868
869
1
    if (ferror(fp) != 0) {
870
0
        char buf[64];
871
0
        LOG(WARNING) << "getline failed, errno=" << errno
872
0
                     << ", message=" << strerror_r(errno, buf, 64);
873
0
    }
874
1
    fclose(fp);
875
1
}
876
877
int64_t SystemMetrics::get_max_io_util(const std::map<std::string, int64_t>& lst_value,
878
0
                                       int64_t interval_sec) {
879
0
    int64_t max = 0;
880
0
    for (auto& it : _disk_metrics) {
881
0
        int64_t cur = it.second->disk_io_time_ms->value();
882
0
        const auto find = lst_value.find(it.first);
883
0
        if (find == lst_value.end()) {
884
0
            continue;
885
0
        }
886
0
        int64_t incr = cur - find->second;
887
0
        if (incr > max) max = incr;
888
0
    }
889
0
    return max / interval_sec / 10;
890
0
}
891
892
0
void SystemMetrics::get_disks_io_time(std::map<std::string, int64_t>* map) {
893
0
    map->clear();
894
0
    for (auto& it : _disk_metrics) {
895
0
        map->emplace(it.first, it.second->disk_io_time_ms->value());
896
0
    }
897
0
}
898
899
0
double SystemMetrics::get_load_average_1_min() {
900
0
    if (_load_average_metrics) {
901
0
        return _load_average_metrics->load_average_1_minutes->value();
902
0
    } else {
903
0
        return 0;
904
0
    }
905
0
}
906
907
void SystemMetrics::get_network_traffic(std::map<std::string, int64_t>* send_map,
908
0
                                        std::map<std::string, int64_t>* rcv_map) {
909
0
    send_map->clear();
910
0
    rcv_map->clear();
911
0
    for (auto& it : _network_metrics) {
912
0
        if (it.first == "lo") {
913
0
            continue;
914
0
        }
915
0
        send_map->emplace(it.first, it.second->network_send_bytes->value());
916
0
        rcv_map->emplace(it.first, it.second->network_receive_bytes->value());
917
0
    }
918
0
}
919
920
void SystemMetrics::get_max_net_traffic(const std::map<std::string, int64_t>& lst_send_map,
921
                                        const std::map<std::string, int64_t>& lst_rcv_map,
922
                                        int64_t interval_sec, int64_t* send_rate,
923
0
                                        int64_t* rcv_rate) {
924
0
    int64_t max_send = 0;
925
0
    int64_t max_rcv = 0;
926
0
    for (auto& it : _network_metrics) {
927
0
        int64_t cur_send = it.second->network_send_bytes->value();
928
0
        int64_t cur_rcv = it.second->network_receive_bytes->value();
929
930
0
        const auto find_send = lst_send_map.find(it.first);
931
0
        if (find_send != lst_send_map.end()) {
932
0
            int64_t incr = cur_send - find_send->second;
933
0
            if (incr > max_send) max_send = incr;
934
0
        }
935
0
        const auto find_rcv = lst_rcv_map.find(it.first);
936
0
        if (find_rcv != lst_rcv_map.end()) {
937
0
            int64_t incr = cur_rcv - find_rcv->second;
938
0
            if (incr > max_rcv) max_rcv = incr;
939
0
        }
940
0
    }
941
942
0
    *send_rate = max_send / interval_sec;
943
0
    *rcv_rate = max_rcv / interval_sec;
944
0
}
945
946
void SystemMetrics::update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value,
947
0
                                                    int64_t interval_sec) {
948
0
    max_disk_io_util_percent->set_value(get_max_io_util(lst_value, interval_sec));
949
0
}
950
951
0
void SystemMetrics::update_max_network_send_bytes_rate(int64_t max_send_bytes_rate) {
952
0
    max_network_send_bytes_rate->set_value(max_send_bytes_rate);
953
0
}
954
955
0
void SystemMetrics::update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate) {
956
0
    max_network_receive_bytes_rate->set_value(max_receive_bytes_rate);
957
0
}
958
959
2
void SystemMetrics::_install_proc_metrics(MetricEntity* entity) {
960
2
    _proc_metrics.reset(new ProcMetrics(entity));
961
2
}
962
963
1
void SystemMetrics::_update_proc_metrics() {
964
1
#ifdef BE_TEST
965
1
    FILE* fp = fopen(k_ut_stat_path, "r");
966
#else
967
    FILE* fp = fopen("/proc/stat", "r");
968
#endif
969
1
    if (fp == nullptr) {
970
0
        char buf[64];
971
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
972
0
                     << ", message=" << strerror_r(errno, buf, 64);
973
0
        return;
974
0
    }
975
976
1
    uint64_t inter = 0, ctxt = 0, procs_r = 0, procs_b = 0;
977
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
978
40
        char* start_pos = nullptr;
979
40
        start_pos = strstr(_line_ptr, "intr ");
980
40
        if (start_pos) {
981
1
            sscanf(start_pos, "intr %" PRIu64, &inter);
982
1
            _proc_metrics->proc_interrupt->set_value(inter);
983
1
        }
984
985
40
        start_pos = strstr(_line_ptr, "ctxt ");
986
40
        if (start_pos) {
987
1
            sscanf(start_pos, "ctxt %" PRIu64, &ctxt);
988
1
            _proc_metrics->proc_ctxt_switch->set_value(ctxt);
989
1
        }
990
991
40
        start_pos = strstr(_line_ptr, "procs_running ");
992
40
        if (start_pos) {
993
1
            sscanf(start_pos, "procs_running %" PRIu64, &procs_r);
994
1
            _proc_metrics->proc_procs_running->set_value(procs_r);
995
1
        }
996
997
40
        start_pos = strstr(_line_ptr, "procs_blocked ");
998
40
        if (start_pos) {
999
1
            sscanf(start_pos, "procs_blocked %" PRIu64, &procs_b);
1000
1
            _proc_metrics->proc_procs_blocked->set_value(procs_b);
1001
1
        }
1002
40
    }
1003
1004
1
    if (ferror(fp) != 0) {
1005
0
        char buf[64];
1006
0
        LOG(WARNING) << "getline failed, errno=" << errno
1007
0
                     << ", message=" << strerror_r(errno, buf, 64);
1008
0
    }
1009
1010
1
    fclose(fp);
1011
1
}
1012
1013
0
void SystemMetrics::update_be_avail_cpu_num() {
1014
0
    int64_t physical_cpu_num = _cpu_num_metrics->host_cpu_num->value();
1015
0
    if (physical_cpu_num > 0) {
1016
0
        physical_cpu_num = CGroupUtil::get_cgroup_limited_cpu_number(physical_cpu_num);
1017
0
        _cpu_num_metrics->avail_cpu_num->set_value(physical_cpu_num);
1018
0
    }
1019
0
}
1020
1021
1
void SystemMetrics::get_metrics_from_proc_vmstat() {
1022
1
#ifdef BE_TEST
1023
1
    FILE* fp = fopen(k_ut_vmstat_path, "r");
1024
#else
1025
    FILE* fp = fopen("/proc/vmstat", "r");
1026
#endif
1027
1
    if (fp == nullptr) {
1028
0
        char buf[64];
1029
0
        LOG(WARNING) << "open /proc/vmstat failed, errno=" << errno
1030
0
                     << ", message=" << strerror_r(errno, buf, 64);
1031
0
        return;
1032
0
    }
1033
1034
13
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1035
12
        uint64_t value;
1036
12
        char name[64];
1037
12
        int num = sscanf(_line_ptr, "%s %" PRIu64, name, &value);
1038
12
        if (num < 2) {
1039
0
            continue;
1040
0
        }
1041
1042
12
        if (strcmp(name, "pgpgin") == 0) {
1043
1
            _memory_metrics->memory_pgpgin->set_value(value);
1044
11
        } else if (strcmp(name, "pgpgout") == 0) {
1045
1
            _memory_metrics->memory_pgpgout->set_value(value);
1046
10
        } else if (strcmp(name, "pswpin") == 0) {
1047
1
            _memory_metrics->memory_pswpin->set_value(value);
1048
9
        } else if (strcmp(name, "pswpout") == 0) {
1049
1
            _memory_metrics->memory_pswpout->set_value(value);
1050
1
        }
1051
12
    }
1052
1053
1
    if (ferror(fp) != 0) {
1054
0
        char buf[64];
1055
0
        LOG(WARNING) << "getline failed, errno=" << errno
1056
0
                     << ", message=" << strerror_r(errno, buf, 64);
1057
0
    }
1058
1059
1
    fclose(fp);
1060
1
}
1061
1062
2
void SystemMetrics::get_cpu_name() {
1063
2
#ifdef BE_TEST
1064
2
    FILE* fp = fopen(k_ut_stat_path, "r");
1065
#else
1066
    FILE* fp = fopen("/proc/stat", "r");
1067
#endif
1068
2
    if (fp == nullptr) {
1069
1
        char buf[64];
1070
1
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
1071
1
                     << ", message=" << strerror_r(errno, buf, 64);
1072
1
        return;
1073
1
    }
1074
1075
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1076
40
        char cpu[16];
1077
40
        char* start_pos = nullptr;
1078
40
        start_pos = strstr(_line_ptr, "cpu");
1079
40
        if (start_pos) {
1080
33
            sscanf(_line_ptr, "%15s", cpu);
1081
33
            std::string cpu_name(cpu);
1082
33
            _cpu_names.push_back(cpu_name);
1083
33
        }
1084
40
    }
1085
1086
1
    if (ferror(fp) != 0) {
1087
0
        char buf[64];
1088
0
        LOG(WARNING) << "getline failed, errno=" << errno
1089
0
                     << ", message=" << strerror_r(errno, buf, 64);
1090
0
    }
1091
1092
1
    fclose(fp);
1093
1
}
1094
1095
} // namespace doris