Coverage Report

Created: 2026-03-14 13:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/scan/parallel_scanner_builder.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <memory>
21
#include <string>
22
#include <unordered_map>
23
#include <utility>
24
25
#include "exec/scan/olap_scanner.h"
26
#include "storage/rowset/rowset_fwd.h"
27
#include "storage/segment/row_ranges.h"
28
#include "storage/segment/segment_loader.h"
29
#include "storage/tablet/base_tablet.h"
30
#include "storage/tablet/tablet.h"
31
32
namespace doris {
33
34
class OlapScanLocalState;
35
36
class Scanner;
37
38
using ScannerSPtr = std::shared_ptr<Scanner>;
39
40
class ParallelScannerBuilder {
41
public:
42
    ParallelScannerBuilder(OlapScanLocalState* parent,
43
                           const std::vector<TabletWithVersion>& tablets,
44
                           std::vector<TabletReadSource>& read_sources,
45
                           const std::shared_ptr<RuntimeProfile>& profile,
46
                           const std::vector<OlapScanRange*>& key_ranges, RuntimeState* state,
47
                           int64_t limit, bool is_dup_mow_key, bool is_preaggregation)
48
122k
            : _parent(parent),
49
122k
              _scanner_profile(profile),
50
122k
              _state(state),
51
122k
              _limit(limit),
52
122k
              _is_dup_mow_key(is_dup_mow_key),
53
122k
              _is_preaggregation(is_preaggregation),
54
122k
              _tablets(tablets.cbegin(), tablets.cend()),
55
122k
              _key_ranges(key_ranges.cbegin(), key_ranges.cend()),
56
122k
              _read_sources(read_sources) {}
57
58
    Status build_scanners(std::list<ScannerSPtr>& scanners);
59
60
122k
    void set_max_scanners_count(size_t count) { _max_scanners_count = count; }
61
62
123k
    void set_min_rows_per_scanner(int64_t size) { _min_rows_per_scanner = size; }
63
64
39
    void set_scan_parallelism_by_per_segment(bool v) { _scan_parallelism_by_per_segment = v; }
65
66
123k
    const OlapReaderStatistics* builder_stats() const { return &_builder_stats; }
67
68
private:
69
    Status _load();
70
71
    Status _build_scanners_by_rowid(std::list<ScannerSPtr>& scanners);
72
73
    // Build scanners so that each segment is handled by its own scanner.
74
    Status _build_scanners_by_per_segment(std::list<ScannerSPtr>& scanners);
75
76
    std::shared_ptr<OlapScanner> _build_scanner(BaseTabletSPtr tablet, int64_t version,
77
                                                const std::vector<OlapScanRange*>& key_ranges,
78
                                                TabletReadSource&& read_source);
79
80
    OlapScanLocalState* _parent;
81
82
    /// Max scanners count limit to build
83
    size_t _max_scanners_count {16};
84
85
    /// Min rows per scanner
86
    size_t _min_rows_per_scanner {2 * 1024 * 1024};
87
88
    size_t _total_rows {};
89
90
    size_t _rows_per_scanner {_min_rows_per_scanner};
91
92
    std::map<RowsetId, std::vector<size_t>> _all_segments_rows;
93
94
    // Force building one scanner per segment when true.
95
    bool _scan_parallelism_by_per_segment {false};
96
97
    std::shared_ptr<RuntimeProfile> _scanner_profile;
98
    OlapReaderStatistics _builder_stats;
99
    RuntimeState* _state;
100
    int64_t _limit;
101
    bool _is_dup_mow_key;
102
    // The flag of preagg's meaning is whether return pre agg data(or partial agg data)
103
    // PreAgg ON: The storage layer returns partially aggregated data without additional processing. (Fast data reading)
104
    // for example, if a table is select userid,count(*) from base table.
105
    // And the user send a query like select userid,count(*) from base table group by userid.
106
    // then the storage layer do not need do aggregation, it could just return the partial agg data, because the compute layer will do aggregation.
107
    // PreAgg OFF: The storage layer must complete pre-aggregation and return fully aggregated data. (Slow data reading)
108
    bool _is_preaggregation;
109
    std::vector<TabletWithVersion> _tablets;
110
    std::vector<OlapScanRange*> _key_ranges;
111
    std::unordered_map<int64_t, TabletReadSource> _all_read_sources;
112
    std::vector<TabletReadSource>& _read_sources;
113
};
114
115
} // namespace doris