Coverage Report

Created: 2025-03-11 14:37

/root/doris/be/src/util/system_metrics.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/system_metrics.h"
19
20
#include <ctype.h>
21
// IWYU pragma: no_include <bthread/errno.h>
22
#include <errno.h> // IWYU pragma: keep
23
#include <glog/logging.h>
24
#include <inttypes.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
29
#include <functional>
30
#include <ostream>
31
#include <unordered_map>
32
#include <utility>
33
34
#include "gutil/strings/split.h" // for string split
35
#include "gutil/strtoint.h"      //  for atoi64
36
#include "runtime/memory/jemalloc_control.h"
37
#include "util/cgroup_util.h"
38
#include "util/perf_counters.h"
39
40
namespace doris {
41
42
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(avail_cpu_num, MetricUnit::NOUNIT);
43
44
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(host_cpu_num, MetricUnit::NOUNIT);
45
struct CpuNumberMetrics {
46
2
    CpuNumberMetrics(MetricEntity* ent) : entity(ent) {
47
2
        INT_COUNTER_METRIC_REGISTER(entity, host_cpu_num);
48
2
        INT_COUNTER_METRIC_REGISTER(entity, avail_cpu_num);
49
2
    }
50
51
    IntCounter* host_cpu_num {nullptr};
52
    IntCounter* avail_cpu_num {nullptr};
53
    MetricEntity* entity = nullptr;
54
};
55
56
#define DEFINE_CPU_COUNTER_METRIC(metric)                                            \
57
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(cpu_##metric, MetricUnit::PERCENT, "", cpu, \
58
                                         Labels({{"mode", #metric}}));
59
DEFINE_CPU_COUNTER_METRIC(user);
60
DEFINE_CPU_COUNTER_METRIC(nice);
61
DEFINE_CPU_COUNTER_METRIC(system);
62
DEFINE_CPU_COUNTER_METRIC(idle);
63
DEFINE_CPU_COUNTER_METRIC(iowait);
64
DEFINE_CPU_COUNTER_METRIC(irq);
65
DEFINE_CPU_COUNTER_METRIC(soft_irq);
66
DEFINE_CPU_COUNTER_METRIC(steal);
67
DEFINE_CPU_COUNTER_METRIC(guest);
68
DEFINE_CPU_COUNTER_METRIC(guest_nice);
69
70
// /proc/stat: http://www.linuxhowtos.org/System/procstat.htm
71
struct CpuMetrics {
72
33
    CpuMetrics(MetricEntity* ent) : entity(ent) {
73
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_user);
74
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_nice);
75
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_system);
76
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_idle);
77
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_iowait);
78
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_irq);
79
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_soft_irq);
80
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_steal);
81
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest);
82
33
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest_nice);
83
84
33
        metrics[0] = cpu_user;
85
33
        metrics[1] = cpu_nice;
86
33
        metrics[2] = cpu_system;
87
33
        metrics[3] = cpu_idle;
88
33
        metrics[4] = cpu_iowait;
89
33
        metrics[5] = cpu_irq;
90
33
        metrics[6] = cpu_soft_irq;
91
33
        metrics[7] = cpu_steal;
92
33
        metrics[8] = cpu_guest;
93
33
        metrics[9] = cpu_guest_nice;
94
33
    }
95
96
    static constexpr int cpu_num_metrics = 10;
97
98
    MetricEntity* entity = nullptr;
99
    IntCounter* cpu_user;
100
    IntCounter* cpu_nice;
101
    IntCounter* cpu_system;
102
    IntCounter* cpu_idle;
103
    IntCounter* cpu_iowait;
104
    IntCounter* cpu_irq;
105
    IntCounter* cpu_soft_irq;
106
    IntCounter* cpu_steal;
107
    IntCounter* cpu_guest;
108
    IntCounter* cpu_guest_nice;
109
110
    IntCounter* metrics[cpu_num_metrics];
111
};
112
113
#define DEFINE_MEMORY_GAUGE_METRIC(metric, unit) \
114
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(memory_##metric, unit);
115
DEFINE_MEMORY_GAUGE_METRIC(allocated_bytes, MetricUnit::BYTES);
116
DEFINE_MEMORY_GAUGE_METRIC(pgpgin, MetricUnit::NOUNIT);
117
DEFINE_MEMORY_GAUGE_METRIC(pgpgout, MetricUnit::NOUNIT);
118
DEFINE_MEMORY_GAUGE_METRIC(pswpin, MetricUnit::NOUNIT);
119
DEFINE_MEMORY_GAUGE_METRIC(pswpout, MetricUnit::NOUNIT);
120
#ifndef USE_JEMALLOC
121
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_allocated_bytes, MetricUnit::BYTES);
122
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_total_thread_cache_bytes, MetricUnit::BYTES);
123
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_central_cache_free_bytes, MetricUnit::BYTES);
124
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_transfer_cache_free_bytes, MetricUnit::BYTES);
125
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_thread_cache_free_bytes, MetricUnit::BYTES);
126
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_free_bytes, MetricUnit::BYTES);
127
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_unmapped_bytes, MetricUnit::BYTES);
128
#else
129
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_allocated_bytes, MetricUnit::BYTES);
130
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_active_bytes, MetricUnit::BYTES);
131
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES);
132
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES);
133
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES);
134
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES);
135
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES);
136
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT);
137
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT);
138
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT);
139
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT);
140
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT);
141
#endif
142
143
struct MemoryMetrics {
144
2
    MemoryMetrics(MetricEntity* ent) : entity(ent) {
145
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_allocated_bytes);
146
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgin);
147
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgout);
148
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpin);
149
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpout);
150
151
2
#ifndef USE_JEMALLOC
152
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_allocated_bytes);
153
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_total_thread_cache_bytes);
154
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_central_cache_free_bytes);
155
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_transfer_cache_free_bytes);
156
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_thread_cache_free_bytes);
157
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_free_bytes);
158
2
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_unmapped_bytes);
159
#else
160
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_allocated_bytes);
161
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_active_bytes);
162
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_metadata_bytes);
163
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes);
164
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes);
165
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes);
166
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes);
167
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num);
168
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num);
169
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num);
170
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num);
171
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num);
172
#endif
173
2
    }
174
175
    MetricEntity* entity = nullptr;
176
    IntGauge* memory_allocated_bytes;
177
    IntGauge* memory_pgpgin;
178
    IntGauge* memory_pgpgout;
179
    IntGauge* memory_pswpin;
180
    IntGauge* memory_pswpout;
181
182
#ifndef USE_JEMALLOC
183
    IntGauge* memory_tcmalloc_allocated_bytes;
184
    IntGauge* memory_tcmalloc_total_thread_cache_bytes;
185
    IntGauge* memory_tcmalloc_central_cache_free_bytes;
186
    IntGauge* memory_tcmalloc_transfer_cache_free_bytes;
187
    IntGauge* memory_tcmalloc_thread_cache_free_bytes;
188
    IntGauge* memory_tcmalloc_pageheap_free_bytes;
189
    IntGauge* memory_tcmalloc_pageheap_unmapped_bytes;
190
#else
191
    IntGauge* memory_jemalloc_allocated_bytes;
192
    IntGauge* memory_jemalloc_active_bytes;
193
    IntGauge* memory_jemalloc_metadata_bytes;
194
    IntGauge* memory_jemalloc_resident_bytes;
195
    IntGauge* memory_jemalloc_mapped_bytes;
196
    IntGauge* memory_jemalloc_retained_bytes;
197
    IntGauge* memory_jemalloc_tcache_bytes;
198
    IntGauge* memory_jemalloc_pactive_num;
199
    IntGauge* memory_jemalloc_pdirty_num;
200
    IntGauge* memory_jemalloc_pmuzzy_num;
201
    IntGauge* memory_jemalloc_dirty_purged_num;
202
    IntGauge* memory_jemalloc_muzzy_purged_num;
203
#endif
204
};
205
206
#define DEFINE_DISK_COUNTER_METRIC(metric, unit) \
207
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(disk_##metric, unit);
208
DEFINE_DISK_COUNTER_METRIC(reads_completed, MetricUnit::OPERATIONS);
209
DEFINE_DISK_COUNTER_METRIC(bytes_read, MetricUnit::BYTES);
210
DEFINE_DISK_COUNTER_METRIC(read_time_ms, MetricUnit::MILLISECONDS);
211
DEFINE_DISK_COUNTER_METRIC(writes_completed, MetricUnit::OPERATIONS);
212
DEFINE_DISK_COUNTER_METRIC(bytes_written, MetricUnit::BYTES);
213
DEFINE_DISK_COUNTER_METRIC(write_time_ms, MetricUnit::MILLISECONDS);
214
DEFINE_DISK_COUNTER_METRIC(io_time_ms, MetricUnit::MILLISECONDS);
215
DEFINE_DISK_COUNTER_METRIC(io_time_weigthed, MetricUnit::MILLISECONDS);
216
217
struct DiskMetrics {
218
2
    DiskMetrics(MetricEntity* ent) : entity(ent) {
219
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_reads_completed);
220
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_read);
221
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_read_time_ms);
222
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_writes_completed);
223
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_written);
224
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_write_time_ms);
225
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_ms);
226
2
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_weigthed);
227
2
    }
228
229
    MetricEntity* entity = nullptr;
230
    IntCounter* disk_reads_completed;
231
    IntCounter* disk_bytes_read;
232
    IntCounter* disk_read_time_ms;
233
    IntCounter* disk_writes_completed;
234
    IntCounter* disk_bytes_written;
235
    IntCounter* disk_write_time_ms;
236
    IntCounter* disk_io_time_ms;
237
    IntCounter* disk_io_time_weigthed;
238
};
239
240
#define DEFINE_NETWORK_COUNTER_METRIC(metric, unit) \
241
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(network_##metric, unit);
242
DEFINE_NETWORK_COUNTER_METRIC(receive_bytes, MetricUnit::BYTES);
243
DEFINE_NETWORK_COUNTER_METRIC(receive_packets, MetricUnit::PACKETS);
244
DEFINE_NETWORK_COUNTER_METRIC(send_bytes, MetricUnit::BYTES);
245
DEFINE_NETWORK_COUNTER_METRIC(send_packets, MetricUnit::PACKETS);
246
247
struct NetworkMetrics {
248
2
    NetworkMetrics(MetricEntity* ent) : entity(ent) {
249
2
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_bytes);
250
2
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_packets);
251
2
        INT_COUNTER_METRIC_REGISTER(entity, network_send_bytes);
252
2
        INT_COUNTER_METRIC_REGISTER(entity, network_send_packets);
253
2
    }
254
255
    MetricEntity* entity = nullptr;
256
    IntCounter* network_receive_bytes;
257
    IntCounter* network_receive_packets;
258
    IntCounter* network_send_bytes;
259
    IntCounter* network_send_packets;
260
};
261
262
#define DEFINE_SNMP_COUNTER_METRIC(metric, unit, desc) \
263
    DEFINE_COUNTER_METRIC_PROTOTYPE_3ARG(snmp_##metric, unit, desc);
264
DEFINE_SNMP_COUNTER_METRIC(tcp_in_errs, MetricUnit::NOUNIT,
265
                           "The number of all problematic TCP packets received");
266
DEFINE_SNMP_COUNTER_METRIC(tcp_retrans_segs, MetricUnit::NOUNIT, "All TCP packets retransmitted");
267
DEFINE_SNMP_COUNTER_METRIC(tcp_in_segs, MetricUnit::NOUNIT, "All received TCP packets");
268
DEFINE_SNMP_COUNTER_METRIC(tcp_out_segs, MetricUnit::NOUNIT, "All send TCP packets with RST mark");
269
270
// metrics read from /proc/net/snmp
271
struct SnmpMetrics {
272
2
    SnmpMetrics(MetricEntity* ent) : entity(ent) {
273
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_errs);
274
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_retrans_segs);
275
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_segs);
276
2
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_out_segs);
277
2
    }
278
279
    MetricEntity* entity = nullptr;
280
    IntCounter* snmp_tcp_in_errs;
281
    IntCounter* snmp_tcp_retrans_segs;
282
    IntCounter* snmp_tcp_in_segs;
283
    IntCounter* snmp_tcp_out_segs;
284
};
285
286
#define DEFINE_FD_COUNTER_METRIC(metric, unit) \
287
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fd_##metric, unit);
288
DEFINE_FD_COUNTER_METRIC(num_limit, MetricUnit::NOUNIT);
289
DEFINE_FD_COUNTER_METRIC(num_used, MetricUnit::NOUNIT);
290
291
struct FileDescriptorMetrics {
292
2
    FileDescriptorMetrics(MetricEntity* ent) : entity(ent) {
293
2
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_limit);
294
2
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_used);
295
2
    }
296
297
    MetricEntity* entity = nullptr;
298
    IntGauge* fd_num_limit;
299
    IntGauge* fd_num_used;
300
};
301
302
#define DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(metric)                                     \
303
    DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_average_##metric, MetricUnit::NOUNIT, "", \
304
                                       load_average, Labels({{"mode", #metric}}));
305
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(1_minutes);
306
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(5_minutes);
307
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(15_minutes);
308
309
struct LoadAverageMetrics {
310
2
    LoadAverageMetrics(MetricEntity* ent) : entity(ent) {
311
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_1_minutes);
312
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_5_minutes);
313
2
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_15_minutes);
314
2
    }
315
316
    MetricEntity* entity = nullptr;
317
    DoubleGauge* load_average_1_minutes;
318
    DoubleGauge* load_average_5_minutes;
319
    DoubleGauge* load_average_15_minutes;
320
};
321
322
#define DEFINE_PROC_STAT_COUNTER_METRIC(metric)                                       \
323
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(proc_##metric, MetricUnit::NOUNIT, "", proc, \
324
                                         Labels({{"mode", #metric}}));
325
DEFINE_PROC_STAT_COUNTER_METRIC(interrupt);
326
DEFINE_PROC_STAT_COUNTER_METRIC(ctxt_switch);
327
DEFINE_PROC_STAT_COUNTER_METRIC(procs_running);
328
DEFINE_PROC_STAT_COUNTER_METRIC(procs_blocked);
329
330
struct ProcMetrics {
331
2
    ProcMetrics(MetricEntity* ent) : entity(ent) {
332
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_interrupt);
333
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_ctxt_switch);
334
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_running);
335
2
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_blocked);
336
2
    }
337
338
    MetricEntity* entity = nullptr;
339
340
    IntCounter* proc_interrupt;
341
    IntCounter* proc_ctxt_switch;
342
    IntCounter* proc_procs_running;
343
    IntCounter* proc_procs_blocked;
344
};
345
346
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT);
347
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, MetricUnit::BYTES);
348
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, MetricUnit::BYTES);
349
350
const char* SystemMetrics::_s_hook_name = "system_metrics";
351
352
SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::string>& disk_devices,
353
2
                             const std::vector<std::string>& network_interfaces) {
354
2
    DCHECK(registry != nullptr);
355
2
    _registry = registry;
356
2
    _server_entity = _registry->register_entity("server");
357
2
    DCHECK(_server_entity != nullptr);
358
2
    _server_entity->register_hook(_s_hook_name, std::bind(&SystemMetrics::update, this));
359
2
    _install_cpu_metrics();
360
2
    _install_memory_metrics(_server_entity.get());
361
2
    _install_disk_metrics(disk_devices);
362
2
    _install_net_metrics(network_interfaces);
363
2
    _install_fd_metrics(_server_entity.get());
364
2
    _install_snmp_metrics(_server_entity.get());
365
2
    _install_load_avg_metrics(_server_entity.get());
366
2
    _install_proc_metrics(_server_entity.get());
367
368
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_disk_io_util_percent);
369
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_send_bytes_rate);
370
2
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_receive_bytes_rate);
371
2
}
372
373
2
SystemMetrics::~SystemMetrics() {
374
2
    DCHECK(_server_entity != nullptr);
375
2
    _server_entity->deregister_hook(_s_hook_name);
376
377
33
    for (auto& it : _cpu_metrics) {
378
33
        delete it.second;
379
33
    }
380
2
    for (auto& it : _disk_metrics) {
381
2
        delete it.second;
382
2
    }
383
2
    for (auto& it : _network_metrics) {
384
2
        delete it.second;
385
2
    }
386
2
    if (_line_ptr != nullptr) {
387
1
        free(_line_ptr);
388
1
    }
389
2
}
390
391
1
void SystemMetrics::update() {
392
1
    _update_cpu_metrics();
393
1
    _update_memory_metrics();
394
1
    _update_disk_metrics();
395
1
    _update_net_metrics();
396
1
    _update_fd_metrics();
397
1
    _update_snmp_metrics();
398
1
    _update_load_avg_metrics();
399
1
    _update_proc_metrics();
400
1
}
401
402
2
void SystemMetrics::_install_cpu_metrics() {
403
2
    get_cpu_name();
404
405
2
    int cpu_num = 0;
406
33
    for (auto cpu_name : _cpu_names) {
407
        // NOTE: cpu_name comes from /proc/stat which named 'cpu' is not a real cpu name, it should be skipped.
408
33
        if (cpu_name != "cpu") {
409
32
            cpu_num++;
410
32
        }
411
33
        auto cpu_entity = _registry->register_entity(cpu_name, {{"device", cpu_name}});
412
33
        CpuMetrics* metrics = new CpuMetrics(cpu_entity.get());
413
33
        _cpu_metrics.emplace(cpu_name, metrics);
414
33
    }
415
416
2
    auto cpu_num_entity = _registry->register_entity("doris_be_host_cpu_num");
417
2
    _cpu_num_metrics = std::make_unique<CpuNumberMetrics>(cpu_num_entity.get());
418
419
2
    _cpu_num_metrics->host_cpu_num->set_value(cpu_num);
420
2
}
421
422
#ifdef BE_TEST
423
const char* k_ut_stat_path;
424
const char* k_ut_diskstats_path;
425
const char* k_ut_net_dev_path;
426
const char* k_ut_fd_path;
427
const char* k_ut_net_snmp_path;
428
const char* k_ut_load_avg_path;
429
const char* k_ut_vmstat_path;
430
#endif
431
432
1
void SystemMetrics::_update_cpu_metrics() {
433
1
#ifdef BE_TEST
434
1
    FILE* fp = fopen(k_ut_stat_path, "r");
435
#else
436
    FILE* fp = fopen("/proc/stat", "r");
437
#endif
438
1
    if (fp == nullptr) {
439
0
        char buf[64];
440
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
441
0
                     << ", message=" << strerror_r(errno, buf, 64);
442
0
        return;
443
0
    }
444
445
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
446
40
        char cpu[16];
447
40
        int64_t values[CpuMetrics::cpu_num_metrics];
448
40
        memset(values, 0, sizeof(values));
449
40
        int num = sscanf(_line_ptr,
450
40
                         "%15s"
451
40
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
452
40
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
453
40
                         cpu, &values[0], &values[1], &values[2], &values[3], &values[4],
454
40
                         &values[5], &values[6], &values[7], &values[8], &values[9]);
455
40
        if (num < 4) {
456
5
            continue;
457
5
        }
458
459
35
        std::string cpu_name(cpu);
460
35
        auto it = _cpu_metrics.find(cpu_name);
461
35
        if (it == _cpu_metrics.end()) {
462
2
            continue;
463
2
        }
464
465
363
        for (int i = 0; i < CpuMetrics::cpu_num_metrics; ++i) {
466
330
            it->second->metrics[i]->set_value(values[i]);
467
330
        }
468
33
    }
469
470
1
    if (ferror(fp) != 0) {
471
0
        char buf[64];
472
0
        LOG(WARNING) << "getline failed, errno=" << errno
473
0
                     << ", message=" << strerror_r(errno, buf, 64);
474
0
    }
475
476
1
    fclose(fp);
477
1
}
478
479
2
void SystemMetrics::_install_memory_metrics(MetricEntity* entity) {
480
2
    _memory_metrics.reset(new MemoryMetrics(entity));
481
2
}
482
483
1
void SystemMetrics::_update_memory_metrics() {
484
1
    _memory_metrics->memory_allocated_bytes->set_value(PerfCounters::get_vm_rss());
485
1
    get_metrics_from_proc_vmstat();
486
1
}
487
488
0
void SystemMetrics::update_allocator_metrics() {
489
0
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
490
0
    LOG(INFO) << "Memory tracking is not available with address sanitizer builds.";
491
#elif defined(USE_JEMALLOC)
492
    _memory_metrics->memory_jemalloc_allocated_bytes->set_value(
493
            JemallocControl::get_jemallctl_value<int64_t>("stats.allocated"));
494
    _memory_metrics->memory_jemalloc_active_bytes->set_value(
495
            JemallocControl::get_jemallctl_value<int64_t>("stats.active"));
496
    _memory_metrics->memory_jemalloc_metadata_bytes->set_value(
497
            JemallocControl::get_jemallctl_value<int64_t>("stats.metadata"));
498
    _memory_metrics->memory_jemalloc_resident_bytes->set_value(
499
            JemallocControl::get_jemallctl_value<int64_t>("stats.resident"));
500
    _memory_metrics->memory_jemalloc_mapped_bytes->set_value(
501
            JemallocControl::get_jemallctl_value<int64_t>("stats.mapped"));
502
    _memory_metrics->memory_jemalloc_retained_bytes->set_value(
503
            JemallocControl::get_jemallctl_value<int64_t>("stats.retained"));
504
    _memory_metrics->memory_jemalloc_tcache_bytes->set_value(
505
            JemallocControl::get_je_all_arena_metrics("tcache_bytes"));
506
    _memory_metrics->memory_jemalloc_pactive_num->set_value(
507
            JemallocControl::get_je_all_arena_metrics("pactive"));
508
    _memory_metrics->memory_jemalloc_pdirty_num->set_value(
509
            JemallocControl::get_je_all_arena_metrics("pdirty"));
510
    _memory_metrics->memory_jemalloc_pmuzzy_num->set_value(
511
            JemallocControl::get_je_all_arena_metrics("pmuzzy"));
512
    _memory_metrics->memory_jemalloc_dirty_purged_num->set_value(
513
            JemallocControl::get_je_all_arena_metrics("dirty_purged"));
514
    _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value(
515
            JemallocControl::get_je_all_arena_metrics("muzzy_purged"));
516
#else
517
    _memory_metrics->memory_tcmalloc_allocated_bytes->set_value(
518
            JemallocControl::get_tc_metrics("generic.total_physical_bytes"));
519
    _memory_metrics->memory_tcmalloc_total_thread_cache_bytes->set_value(
520
            JemallocControl::je_cache_bytes());
521
    _memory_metrics->memory_tcmalloc_central_cache_free_bytes->set_value(
522
            JemallocControl::get_tc_metrics("tcmalloc.central_cache_free_bytes"));
523
    _memory_metrics->memory_tcmalloc_transfer_cache_free_bytes->set_value(
524
            JemallocControl::get_tc_metrics("tcmalloc.transfer_cache_free_bytes"));
525
    _memory_metrics->memory_tcmalloc_thread_cache_free_bytes->set_value(
526
            JemallocControl::get_tc_metrics("tcmalloc.thread_cache_free_bytes"));
527
    _memory_metrics->memory_tcmalloc_pageheap_free_bytes->set_value(
528
            JemallocControl::get_tc_metrics("tcmalloc.pageheap_free_bytes"));
529
    _memory_metrics->memory_tcmalloc_pageheap_unmapped_bytes->set_value(
530
            JemallocControl::get_tc_metrics("tcmalloc.pageheap_unmapped_bytes"));
531
#endif
532
0
}
533
534
2
void SystemMetrics::_install_disk_metrics(const std::set<std::string>& disk_devices) {
535
2
    for (auto& disk_device : disk_devices) {
536
2
        auto disk_entity = _registry->register_entity(std::string("disk_metrics.") + disk_device,
537
2
                                                      {{"device", disk_device}});
538
2
        DiskMetrics* metrics = new DiskMetrics(disk_entity.get());
539
2
        _disk_metrics.emplace(disk_device, metrics);
540
2
    }
541
2
}
542
543
1
void SystemMetrics::_update_disk_metrics() {
544
1
#ifdef BE_TEST
545
1
    FILE* fp = fopen(k_ut_diskstats_path, "r");
546
#else
547
    FILE* fp = fopen("/proc/diskstats", "r");
548
#endif
549
1
    if (fp == nullptr) {
550
0
        char buf[64];
551
0
        LOG(WARNING) << "open /proc/diskstats failed, errno=" << errno
552
0
                     << ", message=" << strerror_r(errno, buf, 64);
553
0
        return;
554
0
    }
555
556
    // /proc/diskstats: https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
557
    // 1 - major number
558
    // 2 - minor mumber
559
    // 3 - device name
560
    // 4 - reads completed successfully
561
    // 5 - reads merged
562
    // 6 - sectors read
563
    // 7 - time spent reading (ms)
564
    // 8 - writes completed
565
    // 9 - writes merged
566
    // 10 - sectors written
567
    // 11 - time spent writing (ms)
568
    // 12 - I/Os currently in progress
569
    // 13 - time spent doing I/Os (ms)
570
    // 14 - weighted time spent doing I/Os (ms)
571
    // I think 1024 is enough for device name
572
1
    int major = 0;
573
1
    int minor = 0;
574
1
    char device[1024];
575
1
    int64_t values[11];
576
35
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
577
34
        memset(values, 0, sizeof(values));
578
34
        int num = sscanf(_line_ptr,
579
34
                         "%d %d %1023s"
580
34
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
581
34
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
582
34
                         &major, &minor, device, &values[0], &values[1], &values[2], &values[3],
583
34
                         &values[4], &values[5], &values[6], &values[7], &values[8], &values[9],
584
34
                         &values[10]);
585
34
        if (num < 4) {
586
0
            continue;
587
0
        }
588
34
        auto it = _disk_metrics.find(device);
589
34
        if (it == _disk_metrics.end()) {
590
33
            continue;
591
33
        }
592
        // update disk metrics
593
        // reads_completed: 4 reads completed successfully
594
1
        it->second->disk_reads_completed->set_value(values[0]);
595
        // bytes_read: 6 sectors read * 512; 5 reads merged is ignored
596
1
        it->second->disk_bytes_read->set_value(values[2] * 512);
597
        // read_time_ms: 7 time spent reading (ms)
598
1
        it->second->disk_read_time_ms->set_value(values[3]);
599
        // writes_completed: 8 writes completed
600
1
        it->second->disk_writes_completed->set_value(values[4]);
601
        // bytes_written: 10 sectors write * 512; 9 writes merged is ignored
602
1
        it->second->disk_bytes_written->set_value(values[6] * 512);
603
        // write_time_ms: 11 time spent writing (ms)
604
1
        it->second->disk_write_time_ms->set_value(values[7]);
605
        // io_time_ms: 13 time spent doing I/Os (ms)
606
1
        it->second->disk_io_time_ms->set_value(values[9]);
607
        // io_time_weigthed: 14 - weighted time spent doing I/Os (ms)
608
1
        it->second->disk_io_time_weigthed->set_value(values[10]);
609
1
    }
610
1
    if (ferror(fp) != 0) {
611
0
        char buf[64];
612
0
        LOG(WARNING) << "getline failed, errno=" << errno
613
0
                     << ", message=" << strerror_r(errno, buf, 64);
614
0
    }
615
1
    fclose(fp);
616
1
}
617
618
2
void SystemMetrics::_install_net_metrics(const std::vector<std::string>& interfaces) {
619
2
    for (auto& interface : interfaces) {
620
2
        auto interface_entity = _registry->register_entity(
621
2
                std::string("network_metrics.") + interface, {{"device", interface}});
622
2
        NetworkMetrics* metrics = new NetworkMetrics(interface_entity.get());
623
2
        _network_metrics.emplace(interface, metrics);
624
2
    }
625
2
}
626
627
2
void SystemMetrics::_install_snmp_metrics(MetricEntity* entity) {
628
2
    _snmp_metrics.reset(new SnmpMetrics(entity));
629
2
}
630
631
1
void SystemMetrics::_update_net_metrics() {
632
1
#ifdef BE_TEST
633
    // to mock proc
634
1
    FILE* fp = fopen(k_ut_net_dev_path, "r");
635
#else
636
    FILE* fp = fopen("/proc/net/dev", "r");
637
#endif
638
1
    if (fp == nullptr) {
639
0
        char buf[64];
640
0
        LOG(WARNING) << "open /proc/net/dev failed, errno=" << errno
641
0
                     << ", message=" << strerror_r(errno, buf, 64);
642
0
        return;
643
0
    }
644
645
    // Ignore header
646
1
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0 ||
647
1
        getline(&_line_ptr, &_line_buf_size, fp) < 0) {
648
0
        char buf[64];
649
0
        LOG(WARNING) << "read /proc/net/dev first two line failed, errno=" << errno
650
0
                     << ", message=" << strerror_r(errno, buf, 64);
651
0
        fclose(fp);
652
0
        return;
653
0
    }
654
1
    if (_proc_net_dev_version == 0) {
655
1
        if (strstr(_line_ptr, "compressed") != nullptr) {
656
1
            _proc_net_dev_version = 3;
657
1
        } else if (strstr(_line_ptr, "bytes") != nullptr) {
658
0
            _proc_net_dev_version = 2;
659
0
        } else {
660
0
            _proc_net_dev_version = 1;
661
0
        }
662
1
    }
663
664
5
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
665
4
        char* ptr = strrchr(_line_ptr, ':');
666
4
        if (ptr == nullptr) {
667
0
            continue;
668
0
        }
669
4
        char* start = _line_ptr;
670
13
        while (isspace(*start)) {
671
9
            start++;
672
9
        }
673
4
        std::string interface(start, ptr - start);
674
4
        auto it = _network_metrics.find(interface);
675
4
        if (it == _network_metrics.end()) {
676
3
            continue;
677
3
        }
678
1
        ptr++;
679
1
        int64_t receive_bytes = 0;
680
1
        int64_t receive_packets = 0;
681
1
        int64_t send_bytes = 0;
682
1
        int64_t send_packets = 0;
683
1
        switch (_proc_net_dev_version) {
684
1
        case 3:
685
            // receive: bytes packets errs drop fifo frame compressed multicast
686
            // send:    bytes packets errs drop fifo colls carrier compressed
687
1
            sscanf(ptr,
688
1
                   " %" PRId64 " %" PRId64
689
1
                   " %*d %*d %*d %*d %*d %*d"
690
1
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d %*d",
691
1
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
692
1
            break;
693
0
        case 2:
694
            // receive: bytes packets errs drop fifo frame
695
            // send:    bytes packets errs drop fifo colls carrier
696
0
            sscanf(ptr,
697
0
                   " %" PRId64 " %" PRId64
698
0
                   " %*d %*d %*d %*d"
699
0
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d",
700
0
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
701
0
            break;
702
0
        case 1:
703
            // receive: packets errs drop fifo frame
704
            // send: packets errs drop fifo colls carrier
705
0
            sscanf(ptr,
706
0
                   " %" PRId64
707
0
                   " %*d %*d %*d %*d"
708
0
                   " %" PRId64 " %*d %*d %*d %*d %*d",
709
0
                   &receive_packets, &send_packets);
710
0
            break;
711
0
        default:
712
0
            break;
713
1
        }
714
1
        it->second->network_receive_bytes->set_value(receive_bytes);
715
1
        it->second->network_receive_packets->set_value(receive_packets);
716
1
        it->second->network_send_bytes->set_value(send_bytes);
717
1
        it->second->network_send_packets->set_value(send_packets);
718
1
    }
719
1
    if (ferror(fp) != 0) {
720
0
        char buf[64];
721
0
        LOG(WARNING) << "getline failed, errno=" << errno
722
0
                     << ", message=" << strerror_r(errno, buf, 64);
723
0
    }
724
1
    fclose(fp);
725
1
}
726
727
1
void SystemMetrics::_update_snmp_metrics() {
728
1
#ifdef BE_TEST
729
    // to mock proc
730
1
    FILE* fp = fopen(k_ut_net_snmp_path, "r");
731
#else
732
    FILE* fp = fopen("/proc/net/snmp", "r");
733
#endif
734
1
    if (fp == nullptr) {
735
0
        char buf[64];
736
0
        LOG(WARNING) << "open /proc/net/snmp failed, errno=" << errno
737
0
                     << ", message=" << strerror_r(errno, buf, 64);
738
0
        return;
739
0
    }
740
741
    // We only care about Tcp lines, so skip other lines in front of Tcp line
742
1
    int res = 0;
743
7
    while ((res = getline(&_line_ptr, &_line_buf_size, fp)) > 0) {
744
7
        if (strstr(_line_ptr, "Tcp") != nullptr) {
745
1
            break;
746
1
        }
747
7
    }
748
1
    if (res <= 0) {
749
0
        char buf[64];
750
0
        LOG(WARNING) << "failed to skip lines of /proc/net/snmp, errno=" << errno
751
0
                     << ", message=" << strerror_r(errno, buf, 64);
752
0
        fclose(fp);
753
0
        return;
754
0
    }
755
756
    // parse the Tcp header
757
    // Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors
758
1
    std::vector<std::string> headers = strings::Split(_line_ptr, " ");
759
1
    std::unordered_map<std::string, int32_t> header_map;
760
1
    int32_t pos = 0;
761
16
    for (auto& h : headers) {
762
16
        header_map.emplace(h, pos++);
763
16
    }
764
765
    // read the metrics of TCP
766
1
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0) {
767
0
        char buf[64];
768
0
        LOG(WARNING) << "failed to skip Tcp header line of /proc/net/snmp, errno=" << errno
769
0
                     << ", message=" << strerror_r(errno, buf, 64);
770
0
        fclose(fp);
771
0
        return;
772
0
    }
773
774
    // metric line looks like:
775
    // Tcp: 1 200 120000 -1 47849374 38601877 3353843 2320314 276 1033354613 1166025166 825439 12694 23238924 0
776
1
    std::vector<std::string> metrics = strings::Split(_line_ptr, " ");
777
1
    if (metrics.size() != headers.size()) {
778
0
        LOG(WARNING) << "invalid tcp metrics line: " << _line_ptr;
779
0
        fclose(fp);
780
0
        return;
781
0
    }
782
1
    int64_t retrans_segs = atoi64(metrics[header_map["RetransSegs"]]);
783
1
    int64_t in_errs = atoi64(metrics[header_map["InErrs"]]);
784
1
    int64_t in_segs = atoi64(metrics[header_map["InSegs"]]);
785
1
    int64_t out_segs = atoi64(metrics[header_map["OutSegs"]]);
786
1
    _snmp_metrics->snmp_tcp_retrans_segs->set_value(retrans_segs);
787
1
    _snmp_metrics->snmp_tcp_in_errs->set_value(in_errs);
788
1
    _snmp_metrics->snmp_tcp_in_segs->set_value(in_segs);
789
1
    _snmp_metrics->snmp_tcp_out_segs->set_value(out_segs);
790
791
1
    if (ferror(fp) != 0) {
792
0
        char buf[64];
793
0
        LOG(WARNING) << "getline failed, errno=" << errno
794
0
                     << ", message=" << strerror_r(errno, buf, 64);
795
0
    }
796
1
    fclose(fp);
797
1
}
798
799
2
void SystemMetrics::_install_fd_metrics(MetricEntity* entity) {
800
2
    _fd_metrics.reset(new FileDescriptorMetrics(entity));
801
2
}
802
803
1
void SystemMetrics::_update_fd_metrics() {
804
1
#ifdef BE_TEST
805
1
    FILE* fp = fopen(k_ut_fd_path, "r");
806
#else
807
    FILE* fp = fopen("/proc/sys/fs/file-nr", "r");
808
#endif
809
1
    if (fp == nullptr) {
810
0
        char buf[64];
811
0
        LOG(WARNING) << "open /proc/sys/fs/file-nr failed, errno=" << errno
812
0
                     << ", message=" << strerror_r(errno, buf, 64);
813
0
        return;
814
0
    }
815
816
    // /proc/sys/fs/file-nr: https://www.kernel.org/doc/Documentation/sysctl/fs.txt
817
    // 1 - the number of allocated file handles
818
    // 2 - the number of allocated but unused file handles
819
    // 3 - the maximum number of file handles
820
821
1
    int64_t values[3];
822
1
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
823
1
        memset(values, 0, sizeof(values));
824
1
        int num = sscanf(_line_ptr, "%" PRId64 " %" PRId64 " %" PRId64, &values[0], &values[1],
825
1
                         &values[2]);
826
1
        if (num == 3) {
827
1
            _fd_metrics->fd_num_limit->set_value(values[2]);
828
1
            _fd_metrics->fd_num_used->set_value(values[0] - values[1]);
829
1
        }
830
1
    }
831
832
1
    if (ferror(fp) != 0) {
833
0
        char buf[64];
834
0
        LOG(WARNING) << "getline failed, errno=" << errno
835
0
                     << ", message=" << strerror_r(errno, buf, 64);
836
0
    }
837
1
    fclose(fp);
838
1
}
839
840
2
void SystemMetrics::_install_load_avg_metrics(MetricEntity* entity) {
841
2
    _load_average_metrics.reset(new LoadAverageMetrics(entity));
842
2
}
843
844
1
void SystemMetrics::_update_load_avg_metrics() {
845
1
#ifdef BE_TEST
846
1
    FILE* fp = fopen(k_ut_load_avg_path, "r");
847
#else
848
    FILE* fp = fopen("/proc/loadavg", "r");
849
#endif
850
1
    if (fp == nullptr) {
851
0
        char buf[64];
852
0
        LOG(WARNING) << "open /proc/loadavg failed, errno=" << errno
853
0
                     << ", message=" << strerror_r(errno, buf, 64);
854
0
        return;
855
0
    }
856
857
1
    double values[3];
858
1
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
859
1
        memset(values, 0, sizeof(values));
860
1
        int num = sscanf(_line_ptr, "%lf %lf %lf", &values[0], &values[1], &values[2]);
861
1
        if (num == 3) {
862
1
            _load_average_metrics->load_average_1_minutes->set_value(values[0]);
863
1
            _load_average_metrics->load_average_5_minutes->set_value(values[1]);
864
1
            _load_average_metrics->load_average_15_minutes->set_value(values[2]);
865
1
        }
866
1
    }
867
868
1
    if (ferror(fp) != 0) {
869
0
        char buf[64];
870
0
        LOG(WARNING) << "getline failed, errno=" << errno
871
0
                     << ", message=" << strerror_r(errno, buf, 64);
872
0
    }
873
1
    fclose(fp);
874
1
}
875
876
int64_t SystemMetrics::get_max_io_util(const std::map<std::string, int64_t>& lst_value,
877
0
                                       int64_t interval_sec) {
878
0
    int64_t max = 0;
879
0
    for (auto& it : _disk_metrics) {
880
0
        int64_t cur = it.second->disk_io_time_ms->value();
881
0
        const auto find = lst_value.find(it.first);
882
0
        if (find == lst_value.end()) {
883
0
            continue;
884
0
        }
885
0
        int64_t incr = cur - find->second;
886
0
        if (incr > max) max = incr;
887
0
    }
888
0
    return max / interval_sec / 10;
889
0
}
890
891
0
void SystemMetrics::get_disks_io_time(std::map<std::string, int64_t>* map) {
892
0
    map->clear();
893
0
    for (auto& it : _disk_metrics) {
894
0
        map->emplace(it.first, it.second->disk_io_time_ms->value());
895
0
    }
896
0
}
897
898
0
double SystemMetrics::get_load_average_1_min() {
899
0
    if (_load_average_metrics) {
900
0
        return _load_average_metrics->load_average_1_minutes->value();
901
0
    } else {
902
0
        return 0;
903
0
    }
904
0
}
905
906
void SystemMetrics::get_network_traffic(std::map<std::string, int64_t>* send_map,
907
0
                                        std::map<std::string, int64_t>* rcv_map) {
908
0
    send_map->clear();
909
0
    rcv_map->clear();
910
0
    for (auto& it : _network_metrics) {
911
0
        if (it.first == "lo") {
912
0
            continue;
913
0
        }
914
0
        send_map->emplace(it.first, it.second->network_send_bytes->value());
915
0
        rcv_map->emplace(it.first, it.second->network_receive_bytes->value());
916
0
    }
917
0
}
918
919
void SystemMetrics::get_max_net_traffic(const std::map<std::string, int64_t>& lst_send_map,
920
                                        const std::map<std::string, int64_t>& lst_rcv_map,
921
                                        int64_t interval_sec, int64_t* send_rate,
922
0
                                        int64_t* rcv_rate) {
923
0
    int64_t max_send = 0;
924
0
    int64_t max_rcv = 0;
925
0
    for (auto& it : _network_metrics) {
926
0
        int64_t cur_send = it.second->network_send_bytes->value();
927
0
        int64_t cur_rcv = it.second->network_receive_bytes->value();
928
929
0
        const auto find_send = lst_send_map.find(it.first);
930
0
        if (find_send != lst_send_map.end()) {
931
0
            int64_t incr = cur_send - find_send->second;
932
0
            if (incr > max_send) max_send = incr;
933
0
        }
934
0
        const auto find_rcv = lst_rcv_map.find(it.first);
935
0
        if (find_rcv != lst_rcv_map.end()) {
936
0
            int64_t incr = cur_rcv - find_rcv->second;
937
0
            if (incr > max_rcv) max_rcv = incr;
938
0
        }
939
0
    }
940
941
0
    *send_rate = max_send / interval_sec;
942
0
    *rcv_rate = max_rcv / interval_sec;
943
0
}
944
945
void SystemMetrics::update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value,
946
0
                                                    int64_t interval_sec) {
947
0
    max_disk_io_util_percent->set_value(get_max_io_util(lst_value, interval_sec));
948
0
}
949
950
0
void SystemMetrics::update_max_network_send_bytes_rate(int64_t max_send_bytes_rate) {
951
0
    max_network_send_bytes_rate->set_value(max_send_bytes_rate);
952
0
}
953
954
0
void SystemMetrics::update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate) {
955
0
    max_network_receive_bytes_rate->set_value(max_receive_bytes_rate);
956
0
}
957
958
2
void SystemMetrics::_install_proc_metrics(MetricEntity* entity) {
959
2
    _proc_metrics.reset(new ProcMetrics(entity));
960
2
}
961
962
1
void SystemMetrics::_update_proc_metrics() {
963
1
#ifdef BE_TEST
964
1
    FILE* fp = fopen(k_ut_stat_path, "r");
965
#else
966
    FILE* fp = fopen("/proc/stat", "r");
967
#endif
968
1
    if (fp == nullptr) {
969
0
        char buf[64];
970
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
971
0
                     << ", message=" << strerror_r(errno, buf, 64);
972
0
        return;
973
0
    }
974
975
1
    uint64_t inter = 0, ctxt = 0, procs_r = 0, procs_b = 0;
976
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
977
40
        char* start_pos = nullptr;
978
40
        start_pos = strstr(_line_ptr, "intr ");
979
40
        if (start_pos) {
980
1
            sscanf(start_pos, "intr %" PRIu64, &inter);
981
1
            _proc_metrics->proc_interrupt->set_value(inter);
982
1
        }
983
984
40
        start_pos = strstr(_line_ptr, "ctxt ");
985
40
        if (start_pos) {
986
1
            sscanf(start_pos, "ctxt %" PRIu64, &ctxt);
987
1
            _proc_metrics->proc_ctxt_switch->set_value(ctxt);
988
1
        }
989
990
40
        start_pos = strstr(_line_ptr, "procs_running ");
991
40
        if (start_pos) {
992
1
            sscanf(start_pos, "procs_running %" PRIu64, &procs_r);
993
1
            _proc_metrics->proc_procs_running->set_value(procs_r);
994
1
        }
995
996
40
        start_pos = strstr(_line_ptr, "procs_blocked ");
997
40
        if (start_pos) {
998
1
            sscanf(start_pos, "procs_blocked %" PRIu64, &procs_b);
999
1
            _proc_metrics->proc_procs_blocked->set_value(procs_b);
1000
1
        }
1001
40
    }
1002
1003
1
    if (ferror(fp) != 0) {
1004
0
        char buf[64];
1005
0
        LOG(WARNING) << "getline failed, errno=" << errno
1006
0
                     << ", message=" << strerror_r(errno, buf, 64);
1007
0
    }
1008
1009
1
    fclose(fp);
1010
1
}
1011
1012
0
void SystemMetrics::update_be_avail_cpu_num() {
1013
0
    int64_t physical_cpu_num = _cpu_num_metrics->host_cpu_num->value();
1014
0
    if (physical_cpu_num > 0) {
1015
0
        physical_cpu_num = CGroupUtil::get_cgroup_limited_cpu_number(physical_cpu_num);
1016
0
        _cpu_num_metrics->avail_cpu_num->set_value(physical_cpu_num);
1017
0
    }
1018
0
}
1019
1020
1
void SystemMetrics::get_metrics_from_proc_vmstat() {
1021
1
#ifdef BE_TEST
1022
1
    FILE* fp = fopen(k_ut_vmstat_path, "r");
1023
#else
1024
    FILE* fp = fopen("/proc/vmstat", "r");
1025
#endif
1026
1
    if (fp == nullptr) {
1027
0
        char buf[64];
1028
0
        LOG(WARNING) << "open /proc/vmstat failed, errno=" << errno
1029
0
                     << ", message=" << strerror_r(errno, buf, 64);
1030
0
        return;
1031
0
    }
1032
1033
13
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1034
12
        uint64_t value;
1035
12
        char name[64];
1036
12
        int num = sscanf(_line_ptr, "%s %" PRIu64, name, &value);
1037
12
        if (num < 2) {
1038
0
            continue;
1039
0
        }
1040
1041
12
        if (strcmp(name, "pgpgin") == 0) {
1042
1
            _memory_metrics->memory_pgpgin->set_value(value);
1043
11
        } else if (strcmp(name, "pgpgout") == 0) {
1044
1
            _memory_metrics->memory_pgpgout->set_value(value);
1045
10
        } else if (strcmp(name, "pswpin") == 0) {
1046
1
            _memory_metrics->memory_pswpin->set_value(value);
1047
9
        } else if (strcmp(name, "pswpout") == 0) {
1048
1
            _memory_metrics->memory_pswpout->set_value(value);
1049
1
        }
1050
12
    }
1051
1052
1
    if (ferror(fp) != 0) {
1053
0
        char buf[64];
1054
0
        LOG(WARNING) << "getline failed, errno=" << errno
1055
0
                     << ", message=" << strerror_r(errno, buf, 64);
1056
0
    }
1057
1058
1
    fclose(fp);
1059
1
}
1060
1061
2
void SystemMetrics::get_cpu_name() {
1062
2
#ifdef BE_TEST
1063
2
    FILE* fp = fopen(k_ut_stat_path, "r");
1064
#else
1065
    FILE* fp = fopen("/proc/stat", "r");
1066
#endif
1067
2
    if (fp == nullptr) {
1068
1
        char buf[64];
1069
1
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
1070
1
                     << ", message=" << strerror_r(errno, buf, 64);
1071
1
        return;
1072
1
    }
1073
1074
41
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1075
40
        char cpu[16];
1076
40
        char* start_pos = nullptr;
1077
40
        start_pos = strstr(_line_ptr, "cpu");
1078
40
        if (start_pos) {
1079
33
            sscanf(_line_ptr, "%15s", cpu);
1080
33
            std::string cpu_name(cpu);
1081
33
            _cpu_names.push_back(cpu_name);
1082
33
        }
1083
40
    }
1084
1085
1
    if (ferror(fp) != 0) {
1086
0
        char buf[64];
1087
0
        LOG(WARNING) << "getline failed, errno=" << errno
1088
0
                     << ", message=" << strerror_r(errno, buf, 64);
1089
0
    }
1090
1091
1
    fclose(fp);
1092
1
}
1093
1094
} // namespace doris