Coverage Report

Created: 2026-04-13 10:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/common/metrics/system_metrics.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "common/metrics/system_metrics.h"
19
20
#include <absl/strings/str_split.h>
21
#include <glog/logging.h>
22
23
#include <functional>
24
#include <ostream>
25
#include <unordered_map>
26
#include <utility>
27
28
#include "common/cast_set.h"
29
#include "runtime/memory/jemalloc_control.h"
30
#include "util/cgroup_util.h"
31
#include "util/perf_counters.h"
32
33
namespace doris {
34
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(avail_cpu_num, MetricUnit::NOUNIT);
35
36
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(host_cpu_num, MetricUnit::NOUNIT);
37
struct CpuNumberMetrics {
38
4
    CpuNumberMetrics(MetricEntity* ent) : entity(ent) {
39
4
        INT_COUNTER_METRIC_REGISTER(entity, host_cpu_num);
40
4
        INT_COUNTER_METRIC_REGISTER(entity, avail_cpu_num);
41
4
    }
42
43
    IntCounter* host_cpu_num {nullptr};
44
    IntCounter* avail_cpu_num {nullptr};
45
    MetricEntity* entity = nullptr;
46
};
47
48
#define DEFINE_CPU_COUNTER_METRIC(metric)                                            \
49
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(cpu_##metric, MetricUnit::PERCENT, "", cpu, \
50
                                         Labels({{"mode", #metric}}));
51
DEFINE_CPU_COUNTER_METRIC(user);
52
DEFINE_CPU_COUNTER_METRIC(nice);
53
DEFINE_CPU_COUNTER_METRIC(system);
54
DEFINE_CPU_COUNTER_METRIC(idle);
55
DEFINE_CPU_COUNTER_METRIC(iowait);
56
DEFINE_CPU_COUNTER_METRIC(irq);
57
DEFINE_CPU_COUNTER_METRIC(soft_irq);
58
DEFINE_CPU_COUNTER_METRIC(steal);
59
DEFINE_CPU_COUNTER_METRIC(guest);
60
DEFINE_CPU_COUNTER_METRIC(guest_nice);
61
62
// /proc/stat: http://www.linuxhowtos.org/System/procstat.htm
63
struct CpuMetrics {
64
67
    CpuMetrics(MetricEntity* ent) : entity(ent) {
65
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_user);
66
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_nice);
67
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_system);
68
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_idle);
69
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_iowait);
70
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_irq);
71
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_soft_irq);
72
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_steal);
73
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest);
74
67
        INT_COUNTER_METRIC_REGISTER(entity, cpu_guest_nice);
75
76
67
        metrics[0] = cpu_user;
77
67
        metrics[1] = cpu_nice;
78
67
        metrics[2] = cpu_system;
79
67
        metrics[3] = cpu_idle;
80
67
        metrics[4] = cpu_iowait;
81
67
        metrics[5] = cpu_irq;
82
67
        metrics[6] = cpu_soft_irq;
83
67
        metrics[7] = cpu_steal;
84
67
        metrics[8] = cpu_guest;
85
67
        metrics[9] = cpu_guest_nice;
86
67
    }
87
88
    static constexpr int cpu_num_metrics = 10;
89
90
    MetricEntity* entity = nullptr;
91
    IntCounter* cpu_user;
92
    IntCounter* cpu_nice;
93
    IntCounter* cpu_system;
94
    IntCounter* cpu_idle;
95
    IntCounter* cpu_iowait;
96
    IntCounter* cpu_irq;
97
    IntCounter* cpu_soft_irq;
98
    IntCounter* cpu_steal;
99
    IntCounter* cpu_guest;
100
    IntCounter* cpu_guest_nice;
101
102
    IntCounter* metrics[cpu_num_metrics];
103
};
104
105
#define DEFINE_MEMORY_GAUGE_METRIC(metric, unit) \
106
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(memory_##metric, unit);
107
DEFINE_MEMORY_GAUGE_METRIC(allocated_bytes, MetricUnit::BYTES);
108
DEFINE_MEMORY_GAUGE_METRIC(pgpgin, MetricUnit::NOUNIT);
109
DEFINE_MEMORY_GAUGE_METRIC(pgpgout, MetricUnit::NOUNIT);
110
DEFINE_MEMORY_GAUGE_METRIC(pswpin, MetricUnit::NOUNIT);
111
DEFINE_MEMORY_GAUGE_METRIC(pswpout, MetricUnit::NOUNIT);
112
#ifndef USE_JEMALLOC
113
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_allocated_bytes, MetricUnit::BYTES);
114
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_total_thread_cache_bytes, MetricUnit::BYTES);
115
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_central_cache_free_bytes, MetricUnit::BYTES);
116
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_transfer_cache_free_bytes, MetricUnit::BYTES);
117
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_thread_cache_free_bytes, MetricUnit::BYTES);
118
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_free_bytes, MetricUnit::BYTES);
119
DEFINE_MEMORY_GAUGE_METRIC(tcmalloc_pageheap_unmapped_bytes, MetricUnit::BYTES);
120
#else
121
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_allocated_bytes, MetricUnit::BYTES);
122
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_active_bytes, MetricUnit::BYTES);
123
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_metadata_bytes, MetricUnit::BYTES);
124
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_resident_bytes, MetricUnit::BYTES);
125
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_mapped_bytes, MetricUnit::BYTES);
126
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_retained_bytes, MetricUnit::BYTES);
127
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_tcache_bytes, MetricUnit::BYTES);
128
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pactive_num, MetricUnit::NOUNIT);
129
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pdirty_num, MetricUnit::NOUNIT);
130
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_pmuzzy_num, MetricUnit::NOUNIT);
131
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_dirty_purged_num, MetricUnit::NOUNIT);
132
DEFINE_MEMORY_GAUGE_METRIC(jemalloc_muzzy_purged_num, MetricUnit::NOUNIT);
133
#endif
134
135
struct MemoryMetrics {
136
4
    MemoryMetrics(MetricEntity* ent) : entity(ent) {
137
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_allocated_bytes);
138
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgin);
139
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_pgpgout);
140
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpin);
141
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_pswpout);
142
143
4
#ifndef USE_JEMALLOC
144
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_allocated_bytes);
145
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_total_thread_cache_bytes);
146
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_central_cache_free_bytes);
147
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_transfer_cache_free_bytes);
148
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_thread_cache_free_bytes);
149
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_free_bytes);
150
4
        INT_GAUGE_METRIC_REGISTER(entity, memory_tcmalloc_pageheap_unmapped_bytes);
151
#else
152
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_allocated_bytes);
153
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_active_bytes);
154
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_metadata_bytes);
155
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_resident_bytes);
156
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_mapped_bytes);
157
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_retained_bytes);
158
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_tcache_bytes);
159
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pactive_num);
160
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pdirty_num);
161
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_pmuzzy_num);
162
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_dirty_purged_num);
163
        INT_GAUGE_METRIC_REGISTER(entity, memory_jemalloc_muzzy_purged_num);
164
#endif
165
4
    }
166
167
    MetricEntity* entity = nullptr;
168
    IntGauge* memory_allocated_bytes;
169
    IntGauge* memory_pgpgin;
170
    IntGauge* memory_pgpgout;
171
    IntGauge* memory_pswpin;
172
    IntGauge* memory_pswpout;
173
174
#ifndef USE_JEMALLOC
175
    IntGauge* memory_tcmalloc_allocated_bytes;
176
    IntGauge* memory_tcmalloc_total_thread_cache_bytes;
177
    IntGauge* memory_tcmalloc_central_cache_free_bytes;
178
    IntGauge* memory_tcmalloc_transfer_cache_free_bytes;
179
    IntGauge* memory_tcmalloc_thread_cache_free_bytes;
180
    IntGauge* memory_tcmalloc_pageheap_free_bytes;
181
    IntGauge* memory_tcmalloc_pageheap_unmapped_bytes;
182
#else
183
    IntGauge* memory_jemalloc_allocated_bytes;
184
    IntGauge* memory_jemalloc_active_bytes;
185
    IntGauge* memory_jemalloc_metadata_bytes;
186
    IntGauge* memory_jemalloc_resident_bytes;
187
    IntGauge* memory_jemalloc_mapped_bytes;
188
    IntGauge* memory_jemalloc_retained_bytes;
189
    IntGauge* memory_jemalloc_tcache_bytes;
190
    IntGauge* memory_jemalloc_pactive_num;
191
    IntGauge* memory_jemalloc_pdirty_num;
192
    IntGauge* memory_jemalloc_pmuzzy_num;
193
    IntGauge* memory_jemalloc_dirty_purged_num;
194
    IntGauge* memory_jemalloc_muzzy_purged_num;
195
#endif
196
};
197
198
#define DEFINE_DISK_COUNTER_METRIC(metric, unit) \
199
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(disk_##metric, unit);
200
DEFINE_DISK_COUNTER_METRIC(reads_completed, MetricUnit::OPERATIONS);
201
DEFINE_DISK_COUNTER_METRIC(bytes_read, MetricUnit::BYTES);
202
DEFINE_DISK_COUNTER_METRIC(read_time_ms, MetricUnit::MILLISECONDS);
203
DEFINE_DISK_COUNTER_METRIC(writes_completed, MetricUnit::OPERATIONS);
204
DEFINE_DISK_COUNTER_METRIC(bytes_written, MetricUnit::BYTES);
205
DEFINE_DISK_COUNTER_METRIC(write_time_ms, MetricUnit::MILLISECONDS);
206
DEFINE_DISK_COUNTER_METRIC(io_time_ms, MetricUnit::MILLISECONDS);
207
DEFINE_DISK_COUNTER_METRIC(io_time_weigthed, MetricUnit::MILLISECONDS);
208
209
struct DiskMetrics {
210
4
    DiskMetrics(MetricEntity* ent) : entity(ent) {
211
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_reads_completed);
212
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_read);
213
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_read_time_ms);
214
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_writes_completed);
215
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_bytes_written);
216
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_write_time_ms);
217
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_ms);
218
4
        INT_COUNTER_METRIC_REGISTER(entity, disk_io_time_weigthed);
219
4
    }
220
221
    MetricEntity* entity = nullptr;
222
    IntCounter* disk_reads_completed;
223
    IntCounter* disk_bytes_read;
224
    IntCounter* disk_read_time_ms;
225
    IntCounter* disk_writes_completed;
226
    IntCounter* disk_bytes_written;
227
    IntCounter* disk_write_time_ms;
228
    IntCounter* disk_io_time_ms;
229
    IntCounter* disk_io_time_weigthed;
230
};
231
232
#define DEFINE_NETWORK_COUNTER_METRIC(metric, unit) \
233
    DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(network_##metric, unit);
234
DEFINE_NETWORK_COUNTER_METRIC(receive_bytes, MetricUnit::BYTES);
235
DEFINE_NETWORK_COUNTER_METRIC(receive_packets, MetricUnit::PACKETS);
236
DEFINE_NETWORK_COUNTER_METRIC(send_bytes, MetricUnit::BYTES);
237
DEFINE_NETWORK_COUNTER_METRIC(send_packets, MetricUnit::PACKETS);
238
239
struct NetworkMetrics {
240
12
    NetworkMetrics(MetricEntity* ent) : entity(ent) {
241
12
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_bytes);
242
12
        INT_COUNTER_METRIC_REGISTER(entity, network_receive_packets);
243
12
        INT_COUNTER_METRIC_REGISTER(entity, network_send_bytes);
244
12
        INT_COUNTER_METRIC_REGISTER(entity, network_send_packets);
245
12
    }
246
247
    MetricEntity* entity = nullptr;
248
    IntCounter* network_receive_bytes;
249
    IntCounter* network_receive_packets;
250
    IntCounter* network_send_bytes;
251
    IntCounter* network_send_packets;
252
};
253
254
#define DEFINE_SNMP_COUNTER_METRIC(metric, unit, desc) \
255
    DEFINE_COUNTER_METRIC_PROTOTYPE_3ARG(snmp_##metric, unit, desc);
256
DEFINE_SNMP_COUNTER_METRIC(tcp_in_errs, MetricUnit::NOUNIT,
257
                           "The number of all problematic TCP packets received");
258
DEFINE_SNMP_COUNTER_METRIC(tcp_retrans_segs, MetricUnit::NOUNIT, "All TCP packets retransmitted");
259
DEFINE_SNMP_COUNTER_METRIC(tcp_in_segs, MetricUnit::NOUNIT, "All received TCP packets");
260
DEFINE_SNMP_COUNTER_METRIC(tcp_out_segs, MetricUnit::NOUNIT, "All send TCP packets with RST mark");
261
262
// metrics read from /proc/net/snmp
263
struct SnmpMetrics {
264
4
    SnmpMetrics(MetricEntity* ent) : entity(ent) {
265
4
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_errs);
266
4
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_retrans_segs);
267
4
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_in_segs);
268
4
        INT_COUNTER_METRIC_REGISTER(entity, snmp_tcp_out_segs);
269
4
    }
270
271
    MetricEntity* entity = nullptr;
272
    IntCounter* snmp_tcp_in_errs;
273
    IntCounter* snmp_tcp_retrans_segs;
274
    IntCounter* snmp_tcp_in_segs;
275
    IntCounter* snmp_tcp_out_segs;
276
};
277
278
#define DEFINE_FD_COUNTER_METRIC(metric, unit) \
279
    DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fd_##metric, unit);
280
DEFINE_FD_COUNTER_METRIC(num_limit, MetricUnit::NOUNIT);
281
DEFINE_FD_COUNTER_METRIC(num_used, MetricUnit::NOUNIT);
282
283
struct FileDescriptorMetrics {
284
4
    FileDescriptorMetrics(MetricEntity* ent) : entity(ent) {
285
4
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_limit);
286
4
        INT_GAUGE_METRIC_REGISTER(entity, fd_num_used);
287
4
    }
288
289
    MetricEntity* entity = nullptr;
290
    IntGauge* fd_num_limit;
291
    IntGauge* fd_num_used;
292
};
293
294
#define DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(metric)                                     \
295
    DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_average_##metric, MetricUnit::NOUNIT, "", \
296
                                       load_average, Labels({{"mode", #metric}}));
297
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(1_minutes);
298
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(5_minutes);
299
DEFINE_LOAD_AVERAGE_DOUBLE_METRIC(15_minutes);
300
301
struct LoadAverageMetrics {
302
4
    LoadAverageMetrics(MetricEntity* ent) : entity(ent) {
303
4
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_1_minutes);
304
4
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_5_minutes);
305
4
        DOUBLE_GAUGE_METRIC_REGISTER(entity, load_average_15_minutes);
306
4
    }
307
308
    MetricEntity* entity = nullptr;
309
    DoubleGauge* load_average_1_minutes;
310
    DoubleGauge* load_average_5_minutes;
311
    DoubleGauge* load_average_15_minutes;
312
};
313
314
#define DEFINE_PROC_STAT_COUNTER_METRIC(metric)                                       \
315
    DEFINE_COUNTER_METRIC_PROTOTYPE_5ARG(proc_##metric, MetricUnit::NOUNIT, "", proc, \
316
                                         Labels({{"mode", #metric}}));
317
DEFINE_PROC_STAT_COUNTER_METRIC(interrupt);
318
DEFINE_PROC_STAT_COUNTER_METRIC(ctxt_switch);
319
DEFINE_PROC_STAT_COUNTER_METRIC(procs_running);
320
DEFINE_PROC_STAT_COUNTER_METRIC(procs_blocked);
321
322
struct ProcMetrics {
323
4
    ProcMetrics(MetricEntity* ent) : entity(ent) {
324
4
        INT_COUNTER_METRIC_REGISTER(entity, proc_interrupt);
325
4
        INT_COUNTER_METRIC_REGISTER(entity, proc_ctxt_switch);
326
4
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_running);
327
4
        INT_COUNTER_METRIC_REGISTER(entity, proc_procs_blocked);
328
4
    }
329
330
    MetricEntity* entity = nullptr;
331
332
    IntCounter* proc_interrupt;
333
    IntCounter* proc_ctxt_switch;
334
    IntCounter* proc_procs_running;
335
    IntCounter* proc_procs_blocked;
336
};
337
338
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT);
339
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, MetricUnit::BYTES);
340
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, MetricUnit::BYTES);
341
342
const char* SystemMetrics::_s_hook_name = "system_metrics";
343
344
SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::string>& disk_devices,
345
4
                             const std::vector<std::string>& network_interfaces) {
346
4
    DCHECK(registry != nullptr);
347
4
    _registry = registry;
348
4
    _server_entity = _registry->register_entity("server");
349
4
    DCHECK(_server_entity != nullptr);
350
4
    _server_entity->register_hook(_s_hook_name, std::bind(&SystemMetrics::update, this));
351
4
    _install_cpu_metrics();
352
4
    _install_memory_metrics(_server_entity.get());
353
4
    _install_disk_metrics(disk_devices);
354
4
    _install_net_metrics(network_interfaces);
355
4
    _install_fd_metrics(_server_entity.get());
356
4
    _install_snmp_metrics(_server_entity.get());
357
4
    _install_load_avg_metrics(_server_entity.get());
358
4
    _install_proc_metrics(_server_entity.get());
359
360
4
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_disk_io_util_percent);
361
4
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_send_bytes_rate);
362
4
    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), max_network_receive_bytes_rate);
363
4
}
364
365
3
SystemMetrics::~SystemMetrics() {
366
3
    DCHECK(_server_entity != nullptr);
367
3
    _server_entity->deregister_hook(_s_hook_name);
368
369
50
    for (auto& it : _cpu_metrics) {
370
50
        delete it.second;
371
50
    }
372
3
    for (auto& it : _disk_metrics) {
373
3
        delete it.second;
374
3
    }
375
7
    for (auto& it : _network_metrics) {
376
7
        delete it.second;
377
7
    }
378
3
    if (_line_ptr != nullptr) {
379
2
        free(_line_ptr);
380
2
    }
381
3
}
382
383
593
void SystemMetrics::update() {
384
593
    _update_cpu_metrics();
385
593
    _update_memory_metrics();
386
593
    _update_disk_metrics();
387
593
    _update_net_metrics();
388
593
    _update_fd_metrics();
389
593
    _update_snmp_metrics();
390
593
    _update_load_avg_metrics();
391
593
    _update_proc_metrics();
392
593
}
393
394
4
void SystemMetrics::_install_cpu_metrics() {
395
4
    get_cpu_name();
396
397
4
    int cpu_num = 0;
398
67
    for (auto cpu_name : _cpu_names) {
399
        // NOTE: cpu_name comes from /proc/stat which named 'cpu' is not a real cpu name, it should be skipped.
400
67
        if (cpu_name != "cpu") {
401
64
            cpu_num++;
402
64
        }
403
67
        auto cpu_entity = _registry->register_entity(cpu_name, {{"device", cpu_name}});
404
67
        CpuMetrics* metrics = new CpuMetrics(cpu_entity.get());
405
67
        _cpu_metrics.emplace(cpu_name, metrics);
406
67
    }
407
408
4
    auto cpu_num_entity = _registry->register_entity("doris_be_host_cpu_num");
409
4
    _cpu_num_metrics = std::make_unique<CpuNumberMetrics>(cpu_num_entity.get());
410
411
4
    _cpu_num_metrics->host_cpu_num->set_value(cpu_num);
412
4
}
413
414
#ifdef BE_TEST
415
const char* k_ut_stat_path;
416
const char* k_ut_diskstats_path;
417
const char* k_ut_net_dev_path;
418
const char* k_ut_fd_path;
419
const char* k_ut_net_snmp_path;
420
const char* k_ut_load_avg_path;
421
const char* k_ut_vmstat_path;
422
#endif
423
424
593
void SystemMetrics::_update_cpu_metrics() {
425
#ifdef BE_TEST
426
    FILE* fp = fopen(k_ut_stat_path, "r");
427
#else
428
593
    FILE* fp = fopen("/proc/stat", "r");
429
593
#endif
430
593
    if (fp == nullptr) {
431
0
        char buf[64];
432
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
433
0
                     << ", message=" << strerror_r(errno, buf, 64);
434
0
        return;
435
0
    }
436
437
14.8k
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
438
14.2k
        char cpu[16];
439
14.2k
        int64_t values[CpuMetrics::cpu_num_metrics];
440
14.2k
        memset(values, 0, sizeof(values));
441
14.2k
        int num = sscanf(_line_ptr,
442
14.2k
                         "%15s"
443
14.2k
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
444
14.2k
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
445
14.2k
                         cpu, &values[0], &values[1], &values[2], &values[3], &values[4],
446
14.2k
                         &values[5], &values[6], &values[7], &values[8], &values[9]);
447
14.2k
        if (num < 4) {
448
2.96k
            continue;
449
2.96k
        }
450
451
11.2k
        std::string cpu_name(cpu);
452
11.2k
        auto it = _cpu_metrics.find(cpu_name);
453
11.2k
        if (it == _cpu_metrics.end()) {
454
1.18k
            continue;
455
1.18k
        }
456
457
111k
        for (int i = 0; i < CpuMetrics::cpu_num_metrics; ++i) {
458
100k
            it->second->metrics[i]->set_value(values[i]);
459
100k
        }
460
10.0k
    }
461
462
593
    if (ferror(fp) != 0) {
463
0
        char buf[64];
464
0
        LOG(WARNING) << "getline failed, errno=" << errno
465
0
                     << ", message=" << strerror_r(errno, buf, 64);
466
0
    }
467
468
593
    fclose(fp);
469
593
}
470
471
4
void SystemMetrics::_install_memory_metrics(MetricEntity* entity) {
472
4
    _memory_metrics.reset(new MemoryMetrics(entity));
473
4
}
474
475
593
void SystemMetrics::_update_memory_metrics() {
476
593
    _memory_metrics->memory_allocated_bytes->set_value(PerfCounters::get_vm_rss());
477
593
    get_metrics_from_proc_vmstat();
478
593
}
479
480
0
void SystemMetrics::update_allocator_metrics() {
481
0
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
482
0
    LOG(INFO) << "Memory tracking is not available with address sanitizer builds.";
483
#elif defined(USE_JEMALLOC)
484
    _memory_metrics->memory_jemalloc_allocated_bytes->set_value(
485
            JemallocControl::get_jemallctl_value<int64_t>("stats.allocated"));
486
    _memory_metrics->memory_jemalloc_active_bytes->set_value(
487
            JemallocControl::get_jemallctl_value<int64_t>("stats.active"));
488
    _memory_metrics->memory_jemalloc_metadata_bytes->set_value(
489
            JemallocControl::get_jemallctl_value<int64_t>("stats.metadata"));
490
    _memory_metrics->memory_jemalloc_resident_bytes->set_value(
491
            JemallocControl::get_jemallctl_value<int64_t>("stats.resident"));
492
    _memory_metrics->memory_jemalloc_mapped_bytes->set_value(
493
            JemallocControl::get_jemallctl_value<int64_t>("stats.mapped"));
494
    _memory_metrics->memory_jemalloc_retained_bytes->set_value(
495
            JemallocControl::get_jemallctl_value<int64_t>("stats.retained"));
496
    _memory_metrics->memory_jemalloc_tcache_bytes->set_value(
497
            JemallocControl::get_je_all_arena_metrics("tcache_bytes"));
498
    _memory_metrics->memory_jemalloc_pactive_num->set_value(
499
            JemallocControl::get_je_all_arena_metrics("pactive"));
500
    _memory_metrics->memory_jemalloc_pdirty_num->set_value(
501
            JemallocControl::get_je_all_arena_metrics("pdirty"));
502
    _memory_metrics->memory_jemalloc_pmuzzy_num->set_value(
503
            JemallocControl::get_je_all_arena_metrics("pmuzzy"));
504
    _memory_metrics->memory_jemalloc_dirty_purged_num->set_value(
505
            JemallocControl::get_je_all_arena_metrics("dirty_purged"));
506
    _memory_metrics->memory_jemalloc_muzzy_purged_num->set_value(
507
            JemallocControl::get_je_all_arena_metrics("muzzy_purged"));
508
#else
509
    _memory_metrics->memory_tcmalloc_allocated_bytes->set_value(
510
            JemallocControl::get_tc_metrics("generic.total_physical_bytes"));
511
    _memory_metrics->memory_tcmalloc_total_thread_cache_bytes->set_value(
512
            JemallocControl::je_cache_bytes());
513
    _memory_metrics->memory_tcmalloc_central_cache_free_bytes->set_value(
514
            JemallocControl::get_tc_metrics("tcmalloc.central_cache_free_bytes"));
515
    _memory_metrics->memory_tcmalloc_transfer_cache_free_bytes->set_value(
516
            JemallocControl::get_tc_metrics("tcmalloc.transfer_cache_free_bytes"));
517
    _memory_metrics->memory_tcmalloc_thread_cache_free_bytes->set_value(
518
            JemallocControl::get_tc_metrics("tcmalloc.thread_cache_free_bytes"));
519
    _memory_metrics->memory_tcmalloc_pageheap_free_bytes->set_value(
520
            JemallocControl::get_tc_metrics("tcmalloc.pageheap_free_bytes"));
521
    _memory_metrics->memory_tcmalloc_pageheap_unmapped_bytes->set_value(
522
            JemallocControl::get_tc_metrics("tcmalloc.pageheap_unmapped_bytes"));
523
#endif
524
0
}
525
526
4
void SystemMetrics::_install_disk_metrics(const std::set<std::string>& disk_devices) {
527
4
    for (auto& disk_device : disk_devices) {
528
4
        auto disk_entity = _registry->register_entity(std::string("disk_metrics.") + disk_device,
529
4
                                                      {{"device", disk_device}});
530
4
        DiskMetrics* metrics = new DiskMetrics(disk_entity.get());
531
4
        _disk_metrics.emplace(disk_device, metrics);
532
4
    }
533
4
}
534
535
593
void SystemMetrics::_update_disk_metrics() {
536
#ifdef BE_TEST
537
    FILE* fp = fopen(k_ut_diskstats_path, "r");
538
#else
539
593
    FILE* fp = fopen("/proc/diskstats", "r");
540
593
#endif
541
593
    if (fp == nullptr) {
542
0
        char buf[64];
543
0
        LOG(WARNING) << "open /proc/diskstats failed, errno=" << errno
544
0
                     << ", message=" << strerror_r(errno, buf, 64);
545
0
        return;
546
0
    }
547
548
    // /proc/diskstats: https://www.kernel.org/doc/Documentation/ABI/testing/procfs-diskstats
549
    // 1 - major number
550
    // 2 - minor mumber
551
    // 3 - device name
552
    // 4 - reads completed successfully
553
    // 5 - reads merged
554
    // 6 - sectors read
555
    // 7 - time spent reading (ms)
556
    // 8 - writes completed
557
    // 9 - writes merged
558
    // 10 - sectors written
559
    // 11 - time spent writing (ms)
560
    // 12 - I/Os currently in progress
561
    // 13 - time spent doing I/Os (ms)
562
    // 14 - weighted time spent doing I/Os (ms)
563
    // I think 1024 is enough for device name
564
593
    int major = 0;
565
593
    int minor = 0;
566
593
    char device[1024];
567
593
    int64_t values[11];
568
7.13k
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
569
6.54k
        memset(values, 0, sizeof(values));
570
6.54k
        int num = sscanf(_line_ptr,
571
6.54k
                         "%d %d %1023s"
572
6.54k
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64
573
6.54k
                         " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64 " %" PRId64,
574
6.54k
                         &major, &minor, device, &values[0], &values[1], &values[2], &values[3],
575
6.54k
                         &values[4], &values[5], &values[6], &values[7], &values[8], &values[9],
576
6.54k
                         &values[10]);
577
6.54k
        if (num < 4) {
578
0
            continue;
579
0
        }
580
6.54k
        auto it = _disk_metrics.find(device);
581
6.54k
        if (it == _disk_metrics.end()) {
582
5.95k
            continue;
583
5.95k
        }
584
        // update disk metrics
585
        // reads_completed: 4 reads completed successfully
586
593
        it->second->disk_reads_completed->set_value(values[0]);
587
        // bytes_read: 6 sectors read * 512; 5 reads merged is ignored
588
593
        it->second->disk_bytes_read->set_value(values[2] * 512);
589
        // read_time_ms: 7 time spent reading (ms)
590
593
        it->second->disk_read_time_ms->set_value(values[3]);
591
        // writes_completed: 8 writes completed
592
593
        it->second->disk_writes_completed->set_value(values[4]);
593
        // bytes_written: 10 sectors write * 512; 9 writes merged is ignored
594
593
        it->second->disk_bytes_written->set_value(values[6] * 512);
595
        // write_time_ms: 11 time spent writing (ms)
596
593
        it->second->disk_write_time_ms->set_value(values[7]);
597
        // io_time_ms: 13 time spent doing I/Os (ms)
598
593
        it->second->disk_io_time_ms->set_value(values[9]);
599
        // io_time_weigthed: 14 - weighted time spent doing I/Os (ms)
600
593
        it->second->disk_io_time_weigthed->set_value(values[10]);
601
593
    }
602
593
    if (ferror(fp) != 0) {
603
0
        char buf[64];
604
0
        LOG(WARNING) << "getline failed, errno=" << errno
605
0
                     << ", message=" << strerror_r(errno, buf, 64);
606
0
    }
607
593
    fclose(fp);
608
593
}
609
610
4
void SystemMetrics::_install_net_metrics(const std::vector<std::string>& interfaces) {
611
12
    for (auto& interface : interfaces) {
612
12
        auto interface_entity = _registry->register_entity(
613
12
                std::string("network_metrics.") + interface, {{"device", interface}});
614
12
        NetworkMetrics* metrics = new NetworkMetrics(interface_entity.get());
615
12
        _network_metrics.emplace(interface, metrics);
616
12
    }
617
4
}
618
619
4
void SystemMetrics::_install_snmp_metrics(MetricEntity* entity) {
620
4
    _snmp_metrics.reset(new SnmpMetrics(entity));
621
4
}
622
623
593
void SystemMetrics::_update_net_metrics() {
624
#ifdef BE_TEST
625
    // to mock proc
626
    FILE* fp = fopen(k_ut_net_dev_path, "r");
627
#else
628
593
    FILE* fp = fopen("/proc/net/dev", "r");
629
593
#endif
630
593
    if (fp == nullptr) {
631
0
        char buf[64];
632
0
        LOG(WARNING) << "open /proc/net/dev failed, errno=" << errno
633
0
                     << ", message=" << strerror_r(errno, buf, 64);
634
0
        return;
635
0
    }
636
637
    // Ignore header
638
593
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0 ||
639
593
        getline(&_line_ptr, &_line_buf_size, fp) < 0) {
640
0
        char buf[64];
641
0
        LOG(WARNING) << "read /proc/net/dev first two line failed, errno=" << errno
642
0
                     << ", message=" << strerror_r(errno, buf, 64);
643
0
        fclose(fp);
644
0
        return;
645
0
    }
646
593
    if (_proc_net_dev_version == 0) {
647
3
        if (strstr(_line_ptr, "compressed") != nullptr) {
648
3
            _proc_net_dev_version = 3;
649
3
        } else if (strstr(_line_ptr, "bytes") != nullptr) {
650
0
            _proc_net_dev_version = 2;
651
0
        } else {
652
0
            _proc_net_dev_version = 1;
653
0
        }
654
3
    }
655
656
23.2k
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
657
22.6k
        char* ptr = strrchr(_line_ptr, ':');
658
22.6k
        if (ptr == nullptr) {
659
0
            continue;
660
0
        }
661
22.6k
        char* start = _line_ptr;
662
26.2k
        while (isspace(*start)) {
663
3.56k
            start++;
664
3.56k
        }
665
22.6k
        std::string interface(start, ptr - start);
666
22.6k
        auto it = _network_metrics.find(interface);
667
22.6k
        if (it == _network_metrics.end()) {
668
19.6k
            continue;
669
19.6k
        }
670
2.96k
        ptr++;
671
2.96k
        int64_t receive_bytes = 0;
672
2.96k
        int64_t receive_packets = 0;
673
2.96k
        int64_t send_bytes = 0;
674
2.96k
        int64_t send_packets = 0;
675
2.96k
        switch (_proc_net_dev_version) {
676
2.96k
        case 3:
677
            // receive: bytes packets errs drop fifo frame compressed multicast
678
            // send:    bytes packets errs drop fifo colls carrier compressed
679
2.96k
            sscanf(ptr,
680
2.96k
                   " %" PRId64 " %" PRId64
681
2.96k
                   " %*d %*d %*d %*d %*d %*d"
682
2.96k
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d %*d",
683
2.96k
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
684
2.96k
            break;
685
0
        case 2:
686
            // receive: bytes packets errs drop fifo frame
687
            // send:    bytes packets errs drop fifo colls carrier
688
0
            sscanf(ptr,
689
0
                   " %" PRId64 " %" PRId64
690
0
                   " %*d %*d %*d %*d"
691
0
                   " %" PRId64 " %" PRId64 " %*d %*d %*d %*d %*d",
692
0
                   &receive_bytes, &receive_packets, &send_bytes, &send_packets);
693
0
            break;
694
0
        case 1:
695
            // receive: packets errs drop fifo frame
696
            // send: packets errs drop fifo colls carrier
697
0
            sscanf(ptr,
698
0
                   " %" PRId64
699
0
                   " %*d %*d %*d %*d"
700
0
                   " %" PRId64 " %*d %*d %*d %*d %*d",
701
0
                   &receive_packets, &send_packets);
702
0
            break;
703
0
        default:
704
0
            break;
705
2.96k
        }
706
2.96k
        it->second->network_receive_bytes->set_value(receive_bytes);
707
2.96k
        it->second->network_receive_packets->set_value(receive_packets);
708
2.96k
        it->second->network_send_bytes->set_value(send_bytes);
709
2.96k
        it->second->network_send_packets->set_value(send_packets);
710
2.96k
    }
711
593
    if (ferror(fp) != 0) {
712
0
        char buf[64];
713
0
        LOG(WARNING) << "getline failed, errno=" << errno
714
0
                     << ", message=" << strerror_r(errno, buf, 64);
715
0
    }
716
593
    fclose(fp);
717
593
}
718
719
593
void SystemMetrics::_update_snmp_metrics() {
720
#ifdef BE_TEST
721
    // to mock proc
722
    FILE* fp = fopen(k_ut_net_snmp_path, "r");
723
#else
724
593
    FILE* fp = fopen("/proc/net/snmp", "r");
725
593
#endif
726
593
    if (fp == nullptr) {
727
0
        char buf[64];
728
0
        LOG(WARNING) << "open /proc/net/snmp failed, errno=" << errno
729
0
                     << ", message=" << strerror_r(errno, buf, 64);
730
0
        return;
731
0
    }
732
733
    // We only care about Tcp lines, so skip other lines in front of Tcp line
734
593
    int64_t res = 0;
735
4.15k
    while ((res = getline(&_line_ptr, &_line_buf_size, fp)) > 0) {
736
4.15k
        if (strstr(_line_ptr, "Tcp") != nullptr) {
737
593
            break;
738
593
        }
739
4.15k
    }
740
593
    if (res <= 0) {
741
0
        char buf[64];
742
0
        LOG(WARNING) << "failed to skip lines of /proc/net/snmp, errno=" << errno
743
0
                     << ", message=" << strerror_r(errno, buf, 64);
744
0
        fclose(fp);
745
0
        return;
746
0
    }
747
748
    // parse the Tcp header
749
    // Tcp: RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors
750
593
    std::vector<std::string> headers = absl::StrSplit(_line_ptr, " ");
751
593
    std::unordered_map<std::string, int32_t> header_map;
752
593
    int32_t pos = 0;
753
9.48k
    for (auto& h : headers) {
754
9.48k
        header_map.emplace(h, pos++);
755
9.48k
    }
756
757
    // read the metrics of TCP
758
593
    if (getline(&_line_ptr, &_line_buf_size, fp) < 0) {
759
0
        char buf[64];
760
0
        LOG(WARNING) << "failed to skip Tcp header line of /proc/net/snmp, errno=" << errno
761
0
                     << ", message=" << strerror_r(errno, buf, 64);
762
0
        fclose(fp);
763
0
        return;
764
0
    }
765
766
    // metric line looks like:
767
    // Tcp: 1 200 120000 -1 47849374 38601877 3353843 2320314 276 1033354613 1166025166 825439 12694 23238924 0
768
593
    std::vector<std::string> metrics = absl::StrSplit(_line_ptr, " ");
769
593
    if (metrics.size() != headers.size()) {
770
0
        LOG(WARNING) << "invalid tcp metrics line: " << _line_ptr;
771
0
        fclose(fp);
772
0
        return;
773
0
    }
774
593
    int64_t retrans_segs = std::stoll(metrics[header_map["RetransSegs"]]);
775
593
    int64_t in_errs = std::stoll(metrics[header_map["InErrs"]]);
776
593
    int64_t in_segs = std::stoll(metrics[header_map["InSegs"]]);
777
593
    int64_t out_segs = std::stoll(metrics[header_map["OutSegs"]]);
778
593
    _snmp_metrics->snmp_tcp_retrans_segs->set_value(retrans_segs);
779
593
    _snmp_metrics->snmp_tcp_in_errs->set_value(in_errs);
780
593
    _snmp_metrics->snmp_tcp_in_segs->set_value(in_segs);
781
593
    _snmp_metrics->snmp_tcp_out_segs->set_value(out_segs);
782
783
593
    if (ferror(fp) != 0) {
784
0
        char buf[64];
785
0
        LOG(WARNING) << "getline failed, errno=" << errno
786
0
                     << ", message=" << strerror_r(errno, buf, 64);
787
0
    }
788
593
    fclose(fp);
789
593
}
790
791
4
void SystemMetrics::_install_fd_metrics(MetricEntity* entity) {
792
4
    _fd_metrics.reset(new FileDescriptorMetrics(entity));
793
4
}
794
795
593
void SystemMetrics::_update_fd_metrics() {
796
#ifdef BE_TEST
797
    FILE* fp = fopen(k_ut_fd_path, "r");
798
#else
799
593
    FILE* fp = fopen("/proc/sys/fs/file-nr", "r");
800
593
#endif
801
593
    if (fp == nullptr) {
802
0
        char buf[64];
803
0
        LOG(WARNING) << "open /proc/sys/fs/file-nr failed, errno=" << errno
804
0
                     << ", message=" << strerror_r(errno, buf, 64);
805
0
        return;
806
0
    }
807
808
    // /proc/sys/fs/file-nr: https://www.kernel.org/doc/Documentation/sysctl/fs.txt
809
    // 1 - the number of allocated file handles
810
    // 2 - the number of allocated but unused file handles
811
    // 3 - the maximum number of file handles
812
813
593
    int64_t values[3];
814
593
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
815
593
        memset(values, 0, sizeof(values));
816
593
        int num = sscanf(_line_ptr, "%" PRId64 " %" PRId64 " %" PRId64, &values[0], &values[1],
817
593
                         &values[2]);
818
593
        if (num == 3) {
819
593
            _fd_metrics->fd_num_limit->set_value(values[2]);
820
593
            _fd_metrics->fd_num_used->set_value(values[0] - values[1]);
821
593
        }
822
593
    }
823
824
593
    if (ferror(fp) != 0) {
825
0
        char buf[64];
826
0
        LOG(WARNING) << "getline failed, errno=" << errno
827
0
                     << ", message=" << strerror_r(errno, buf, 64);
828
0
    }
829
593
    fclose(fp);
830
593
}
831
832
4
void SystemMetrics::_install_load_avg_metrics(MetricEntity* entity) {
833
4
    _load_average_metrics.reset(new LoadAverageMetrics(entity));
834
4
}
835
836
593
void SystemMetrics::_update_load_avg_metrics() {
837
#ifdef BE_TEST
838
    FILE* fp = fopen(k_ut_load_avg_path, "r");
839
#else
840
593
    FILE* fp = fopen("/proc/loadavg", "r");
841
593
#endif
842
593
    if (fp == nullptr) {
843
0
        char buf[64];
844
0
        LOG(WARNING) << "open /proc/loadavg failed, errno=" << errno
845
0
                     << ", message=" << strerror_r(errno, buf, 64);
846
0
        return;
847
0
    }
848
849
593
    double values[3];
850
593
    if (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
851
593
        memset(values, 0, sizeof(values));
852
593
        int num = sscanf(_line_ptr, "%lf %lf %lf", &values[0], &values[1], &values[2]);
853
593
        if (num == 3) {
854
593
            _load_average_metrics->load_average_1_minutes->set_value(values[0]);
855
593
            _load_average_metrics->load_average_5_minutes->set_value(values[1]);
856
593
            _load_average_metrics->load_average_15_minutes->set_value(values[2]);
857
593
        }
858
593
    }
859
860
593
    if (ferror(fp) != 0) {
861
0
        char buf[64];
862
0
        LOG(WARNING) << "getline failed, errno=" << errno
863
0
                     << ", message=" << strerror_r(errno, buf, 64);
864
0
    }
865
593
    fclose(fp);
866
593
}
867
868
int64_t SystemMetrics::get_max_io_util(const std::map<std::string, int64_t>& lst_value,
869
1.48k
                                       int64_t interval_sec) {
870
1.48k
    int64_t max = 0;
871
1.48k
    for (auto& it : _disk_metrics) {
872
1.48k
        int64_t cur = it.second->disk_io_time_ms->value();
873
1.48k
        const auto find = lst_value.find(it.first);
874
1.48k
        if (find == lst_value.end()) {
875
2
            continue;
876
2
        }
877
1.47k
        int64_t incr = cur - find->second;
878
1.47k
        if (incr > max) max = incr;
879
1.47k
    }
880
1.48k
    return max / interval_sec / 10;
881
1.48k
}
882
883
1.48k
void SystemMetrics::get_disks_io_time(std::map<std::string, int64_t>* map) {
884
1.48k
    map->clear();
885
1.48k
    for (auto& it : _disk_metrics) {
886
1.48k
        map->emplace(it.first, it.second->disk_io_time_ms->value());
887
1.48k
    }
888
1.48k
}
889
890
9.41k
double SystemMetrics::get_load_average_1_min() {
891
9.41k
    if (_load_average_metrics) {
892
9.41k
        return _load_average_metrics->load_average_1_minutes->value();
893
9.41k
    } else {
894
0
        return 0;
895
0
    }
896
9.41k
}
897
898
void SystemMetrics::get_network_traffic(std::map<std::string, int64_t>* send_map,
899
592
                                        std::map<std::string, int64_t>* rcv_map) {
900
592
    send_map->clear();
901
592
    rcv_map->clear();
902
2.96k
    for (auto& it : _network_metrics) {
903
2.96k
        if (it.first == "lo") {
904
592
            continue;
905
592
        }
906
2.36k
        send_map->emplace(it.first, it.second->network_send_bytes->value());
907
2.36k
        rcv_map->emplace(it.first, it.second->network_receive_bytes->value());
908
2.36k
    }
909
592
}
910
911
void SystemMetrics::get_max_net_traffic(const std::map<std::string, int64_t>& lst_send_map,
912
                                        const std::map<std::string, int64_t>& lst_rcv_map,
913
                                        int64_t interval_sec, int64_t* send_rate,
914
590
                                        int64_t* rcv_rate) {
915
590
    int64_t max_send = 0;
916
590
    int64_t max_rcv = 0;
917
2.95k
    for (auto& it : _network_metrics) {
918
2.95k
        int64_t cur_send = it.second->network_send_bytes->value();
919
2.95k
        int64_t cur_rcv = it.second->network_receive_bytes->value();
920
921
2.95k
        const auto find_send = lst_send_map.find(it.first);
922
2.95k
        if (find_send != lst_send_map.end()) {
923
2.36k
            int64_t incr = cur_send - find_send->second;
924
2.36k
            if (incr > max_send) max_send = incr;
925
2.36k
        }
926
2.95k
        const auto find_rcv = lst_rcv_map.find(it.first);
927
2.95k
        if (find_rcv != lst_rcv_map.end()) {
928
2.36k
            int64_t incr = cur_rcv - find_rcv->second;
929
2.36k
            if (incr > max_rcv) max_rcv = incr;
930
2.36k
        }
931
2.95k
    }
932
933
590
    *send_rate = max_send / interval_sec;
934
590
    *rcv_rate = max_rcv / interval_sec;
935
590
}
936
937
void SystemMetrics::update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value,
938
590
                                                    int64_t interval_sec) {
939
590
    max_disk_io_util_percent->set_value(get_max_io_util(lst_value, interval_sec));
940
590
}
941
942
590
void SystemMetrics::update_max_network_send_bytes_rate(int64_t max_send_bytes_rate) {
943
590
    max_network_send_bytes_rate->set_value(max_send_bytes_rate);
944
590
}
945
946
590
void SystemMetrics::update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate) {
947
590
    max_network_receive_bytes_rate->set_value(max_receive_bytes_rate);
948
590
}
949
950
4
void SystemMetrics::_install_proc_metrics(MetricEntity* entity) {
951
4
    _proc_metrics.reset(new ProcMetrics(entity));
952
4
}
953
954
593
void SystemMetrics::_update_proc_metrics() {
955
#ifdef BE_TEST
956
    FILE* fp = fopen(k_ut_stat_path, "r");
957
#else
958
593
    FILE* fp = fopen("/proc/stat", "r");
959
593
#endif
960
593
    if (fp == nullptr) {
961
0
        char buf[64];
962
0
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
963
0
                     << ", message=" << strerror_r(errno, buf, 64);
964
0
        return;
965
0
    }
966
967
593
    uint64_t inter = 0, ctxt = 0, procs_r = 0, procs_b = 0;
968
14.8k
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
969
14.2k
        char* start_pos = nullptr;
970
14.2k
        start_pos = strstr(_line_ptr, "intr ");
971
14.2k
        if (start_pos) {
972
593
            sscanf(start_pos, "intr %" PRIu64, &inter);
973
593
            _proc_metrics->proc_interrupt->set_value(inter);
974
593
        }
975
976
14.2k
        start_pos = strstr(_line_ptr, "ctxt ");
977
14.2k
        if (start_pos) {
978
593
            sscanf(start_pos, "ctxt %" PRIu64, &ctxt);
979
593
            _proc_metrics->proc_ctxt_switch->set_value(ctxt);
980
593
        }
981
982
14.2k
        start_pos = strstr(_line_ptr, "procs_running ");
983
14.2k
        if (start_pos) {
984
593
            sscanf(start_pos, "procs_running %" PRIu64, &procs_r);
985
593
            _proc_metrics->proc_procs_running->set_value(procs_r);
986
593
        }
987
988
14.2k
        start_pos = strstr(_line_ptr, "procs_blocked ");
989
14.2k
        if (start_pos) {
990
593
            sscanf(start_pos, "procs_blocked %" PRIu64, &procs_b);
991
593
            _proc_metrics->proc_procs_blocked->set_value(procs_b);
992
593
        }
993
14.2k
    }
994
995
593
    if (ferror(fp) != 0) {
996
0
        char buf[64];
997
0
        LOG(WARNING) << "getline failed, errno=" << errno
998
0
                     << ", message=" << strerror_r(errno, buf, 64);
999
0
    }
1000
1001
593
    fclose(fp);
1002
593
}
1003
1004
590
void SystemMetrics::update_be_avail_cpu_num() {
1005
590
    int64_t physical_cpu_num = _cpu_num_metrics->host_cpu_num->value();
1006
590
    if (physical_cpu_num > 0) {
1007
590
        physical_cpu_num =
1008
590
                CGroupUtil::get_cgroup_limited_cpu_number(cast_set<int32_t>(physical_cpu_num));
1009
590
        _cpu_num_metrics->avail_cpu_num->set_value(physical_cpu_num);
1010
590
    }
1011
590
}
1012
1013
593
void SystemMetrics::get_metrics_from_proc_vmstat() {
1014
#ifdef BE_TEST
1015
    FILE* fp = fopen(k_ut_vmstat_path, "r");
1016
#else
1017
593
    FILE* fp = fopen("/proc/vmstat", "r");
1018
593
#endif
1019
593
    if (fp == nullptr) {
1020
0
        char buf[64];
1021
0
        LOG(WARNING) << "open /proc/vmstat failed, errno=" << errno
1022
0
                     << ", message=" << strerror_r(errno, buf, 64);
1023
0
        return;
1024
0
    }
1025
1026
82.3k
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1027
81.7k
        uint64_t value;
1028
81.7k
        char name[64];
1029
81.7k
        int num = sscanf(_line_ptr, "%s %" PRIu64, name, &value);
1030
81.7k
        if (num < 2) {
1031
0
            continue;
1032
0
        }
1033
1034
81.7k
        if (strcmp(name, "pgpgin") == 0) {
1035
593
            _memory_metrics->memory_pgpgin->set_value(value);
1036
81.1k
        } else if (strcmp(name, "pgpgout") == 0) {
1037
593
            _memory_metrics->memory_pgpgout->set_value(value);
1038
80.5k
        } else if (strcmp(name, "pswpin") == 0) {
1039
593
            _memory_metrics->memory_pswpin->set_value(value);
1040
79.9k
        } else if (strcmp(name, "pswpout") == 0) {
1041
593
            _memory_metrics->memory_pswpout->set_value(value);
1042
593
        }
1043
81.7k
    }
1044
1045
593
    if (ferror(fp) != 0) {
1046
0
        char buf[64];
1047
0
        LOG(WARNING) << "getline failed, errno=" << errno
1048
0
                     << ", message=" << strerror_r(errno, buf, 64);
1049
0
    }
1050
1051
593
    fclose(fp);
1052
593
}
1053
1054
4
void SystemMetrics::get_cpu_name() {
1055
#ifdef BE_TEST
1056
    FILE* fp = fopen(k_ut_stat_path, "r");
1057
#else
1058
4
    FILE* fp = fopen("/proc/stat", "r");
1059
4
#endif
1060
4
    if (fp == nullptr) {
1061
1
        char buf[64];
1062
1
        LOG(WARNING) << "open /proc/stat failed, errno=" << errno
1063
1
                     << ", message=" << strerror_r(errno, buf, 64);
1064
1
        return;
1065
1
    }
1066
1067
91
    while (getline(&_line_ptr, &_line_buf_size, fp) > 0) {
1068
88
        char cpu[16];
1069
88
        char* start_pos = nullptr;
1070
88
        start_pos = strstr(_line_ptr, "cpu");
1071
88
        if (start_pos) {
1072
67
            sscanf(_line_ptr, "%15s", cpu);
1073
67
            std::string cpu_name(cpu);
1074
67
            _cpu_names.push_back(cpu_name);
1075
67
        }
1076
88
    }
1077
1078
3
    if (ferror(fp) != 0) {
1079
0
        char buf[64];
1080
0
        LOG(WARNING) << "getline failed, errno=" << errno
1081
0
                     << ", message=" << strerror_r(errno, buf, 64);
1082
0
    }
1083
1084
3
    fclose(fp);
1085
3
}
1086
1087
} // namespace doris