Coverage Report

Created: 2025-06-08 15:11

/root/doris/be/src/exec/scan_node.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/scan-node.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <functional>
24
#include <string>
25
#include <vector>
26
27
#include "common/status.h"
28
#include "exec/exec_node.h"
29
#include "util/runtime_profile.h"
30
31
namespace doris {
32
33
class DescriptorTbl;
34
class ObjectPool;
35
class RuntimeState;
36
class TPlanNode;
37
class TScanRangeParams;
38
39
// Abstract base class of all scan nodes; introduces set_scan_range().
40
//
41
// Includes ScanNode common counters:
42
//   BytesRead - total bytes read by this scan node
43
//
44
//   TotalRawHdfsReadTime - it measures the total time spent in the disk-io-mgr's reading
45
//     threads for this node. For example, if we have 3 reading threads and each spent
46
//     1 sec, this counter will report 3 sec.
47
//
48
//   TotalReadThroughput - BytesRead divided by the total time spent in this node
49
//     (from Open to Close). For IO bounded queries, this should be very close to the
50
//     total throughput of all the disks.
51
//
52
//   PerDiskRawHdfsThroughput - the read throughput for each disk. If all the data reside
53
//     on disk, this should be the read throughput the disk, regardless of whether the
54
//     query is IO bounded or not.
55
//
56
//   NumDisksAccessed - number of disks accessed.
57
//
58
//   AverageIoMgrQueueCapcity - the average queue capacity in the io mgr for this node.
59
//   AverageIoMgrQueueSize - the average queue size (for ready buffers) in the io mgr
60
//     for this node.
61
//
62
//   AverageScannerThreadConcurrency - the average number of active scanner threads. A
63
//     scanner thread is considered active if it is not blocked by IO. This number would
64
//     be low (less than 1) for IO bounded queries. For cpu bounded queries, this number
65
//     would be close to the max scanner threads allowed.
66
//
67
//   AverageHdfsReadThreadConcurrency - the average number of active hdfs reading threads
68
//     reading for this scan node. For IO bound queries, this should be close to the
69
//     number of disk.
70
//
71
//     HdfsReadThreadConcurrencyCount=<i> - the number of samples taken when the hdfs read
72
//       thread concurrency is <i>.
73
//
74
//   ScanRangesComplete - number of scan ranges completed
75
//
76
class ScanNode : public ExecNode {
77
public:
78
    ScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
79
0
            : ExecNode(pool, tnode, descs) {}
80
81
    // Set up counters
82
    Status prepare(RuntimeState* state) override;
83
84
    // Convert scan_ranges into node-specific scan restrictions.  This should be
85
    // called after prepare()
86
    virtual Status set_scan_ranges(RuntimeState* state,
87
                                   const std::vector<TScanRangeParams>& scan_ranges) = 0;
88
89
0
    bool is_scan_node() const override { return true; }
90
91
0
    RuntimeProfile::Counter* bytes_read_counter() const { return _bytes_read_counter; }
92
0
    RuntimeProfile::Counter* rows_read_counter() const { return _rows_read_counter; }
93
0
    RuntimeProfile::Counter* total_throughput_counter() const { return _total_throughput_counter; }
94
95
    // names of ScanNode common counters
96
    static const std::string _s_bytes_read_counter;
97
    static const std::string _s_rows_read_counter;
98
    static const std::string _s_total_throughput_counter;
99
    static const std::string _s_num_disks_accessed_counter;
100
101
protected:
102
    RuntimeProfile::Counter* _bytes_read_counter = nullptr; // # bytes read from the scanner
103
    RuntimeProfile::Counter* _rows_read_counter = nullptr;
104
    // Wall based aggregate read throughput [bytes/sec]
105
    RuntimeProfile::Counter* _total_throughput_counter = nullptr;
106
    RuntimeProfile::Counter* _num_disks_accessed_counter = nullptr;
107
};
108
109
} // namespace doris