Coverage Report

Created: 2026-03-19 12:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/generic_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/PlanNodes_types.h>
21
22
#include "common/status.h"
23
#include "exprs/vexpr_fwd.h"
24
#include "runtime/descriptors.h"
25
#include "storage/predicate/block_column_predicate.h"
26
#include "util/profile_collector.h"
27
28
namespace doris {
29
class ColumnPredicate;
30
} // namespace doris
31
32
namespace doris {
33
#include "common/compile_check_begin.h"
34
35
class Block;
36
class VSlotRef;
37
38
// Context passed from FileScanner to readers for condition cache integration.
39
// On MISS: readers populate filter_result per-granule during predicate evaluation.
40
// On HIT: readers skip granules where filter_result[granule] == false.
41
struct ConditionCacheContext {
42
    bool is_hit = false;
43
    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
44
    int64_t base_granule = 0; // global granule index of the first granule in filter_result
45
    static constexpr int GRANULE_SIZE = 2048;
46
};
47
48
// This a reader interface for all file readers.
49
// A GenericReader is responsible for reading a file and return
50
// a set of blocks with specified schema,
51
class GenericReader : public ProfileCollector {
52
public:
53
3.53k
    GenericReader() : _push_down_agg_type(TPushAggOp::type::NONE) {}
54
2.88k
    void set_push_down_agg_type(TPushAggOp::type push_down_agg_type) {
55
2.88k
        _push_down_agg_type = push_down_agg_type;
56
2.88k
    }
57
58
    virtual Status get_next_block(Block* block, size_t* read_rows, bool* eof) = 0;
59
60
    // Type is always nullable to process illegal values.
61
    virtual Status get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
62
0
                               std::unordered_set<std::string>* missing_cols) {
63
0
        return Status::NotSupported("get_columns is not implemented");
64
0
    }
65
66
    // This method is responsible for initializing the resource for parsing schema.
67
    // It will be called before `get_parsed_schema`.
68
0
    virtual Status init_schema_reader() {
69
0
        return Status::NotSupported("init_schema_reader is not implemented for this reader.");
70
0
    }
71
    // `col_types` is always nullable to process illegal values.
72
    virtual Status get_parsed_schema(std::vector<std::string>* col_names,
73
0
                                     std::vector<DataTypePtr>* col_types) {
74
0
        return Status::NotSupported("get_parsed_schema is not implemented for this reader.");
75
0
    }
76
3.53k
    ~GenericReader() override = default;
77
78
    /// If the underlying FileReader has filled the partition&missing columns,
79
    /// The FileScanner does not need to fill
80
5.88k
    virtual bool fill_all_columns() const { return _fill_all_columns; }
81
82
    /// Tell the underlying FileReader the partition&missing columns,
83
    /// and the FileReader determine to fill columns or not.
84
    /// Should set _fill_all_columns = true, if fill the columns.
85
    virtual Status set_fill_columns(
86
            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
87
                    partition_columns,
88
2.47k
            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns) {
89
2.47k
        return Status::OK();
90
2.47k
    }
91
92
566
    virtual Status close() { return Status::OK(); }
93
94
54
    Status read_by_rows(const std::list<int64_t>& row_ids) {
95
54
        _read_by_rows = true;
96
54
        _row_ids = row_ids;
97
54
        return _set_read_one_line_impl();
98
54
    }
99
100
    /// The reader is responsible for counting the number of rows read,
101
    /// because some readers, such as parquet/orc,
102
    /// can skip some pages/rowgroups through indexes.
103
5.55k
    virtual bool count_read_rows() { return false; }
104
105
protected:
106
0
    virtual Status _set_read_one_line_impl() {
107
0
        return Status::NotSupported("read_by_rows is not implemented for this reader.");
108
0
    }
109
110
    const size_t _MIN_BATCH_SIZE = 4064; // 4094 - 32(padding)
111
112
    /// Whether the underlying FileReader has filled the partition&missing columns
113
    bool _fill_all_columns = false;
114
    TPushAggOp::type _push_down_agg_type {};
115
116
public:
117
    // Pass condition cache context to the reader for HIT/MISS tracking.
118
0
    virtual void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {}
119
120
    // Returns the total number of rows the reader will produce.
121
    // Used to pre-allocate condition cache with the correct number of granules.
122
2
    virtual int64_t get_total_rows() const { return 0; }
123
124
    // Returns true if this reader has delete operations (e.g. Iceberg position/equality deletes,
125
    // Hive ACID deletes). Used to disable condition cache when deletes are present, since cached
126
    // granule results may become stale if delete files change between queries.
127
2
    virtual bool has_delete_operations() const { return false; }
128
129
protected:
130
    bool _read_by_rows = false;
131
    std::list<int64_t> _row_ids;
132
133
    // Cache to save some common part such as file footer.
134
    // Maybe null if not used
135
    FileMetaCache* _meta_cache = nullptr;
136
};
137
138
#include "common/compile_check_end.h"
139
} // namespace doris