be/src/format/parquet/vparquet_reader.h

Source
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <gen_cpp/parquet_types.h>

#include <cstddef>
#include <cstdint>
#include <list>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "common/status.h"
#include "format/parquet/parquet_common.h"
#include "format/parquet/parquet_predicate.h"
#include "format/parquet/vparquet_column_reader.h"
#include "format/parquet/vparquet_group_reader.h"
#include "format/table/table_format_reader.h"
#include "format/table/table_schema_change_helper.h"
#include "io/file_factory.h"
#include "io/fs/file_meta_cache.h"
#include "io/fs/file_reader.h"
#include "io/fs/file_reader_writer_fwd.h"
#include "runtime/runtime_profile.h"
#include "storage/olap_scan_common.h"
#include "util/obj_lru_cache.h"

namespace cctz {
class time_zone;
} // namespace cctz
namespace doris {
class RowDescriptor;
class RuntimeState;
class SlotDescriptor;
class TFileRangeDesc;
class TFileScanRangeParams;
class TupleDescriptor;

namespace io {
class FileSystem;
struct IOContext;
} // namespace io
class Block;
class FileMetaData;
class PageIndex;
class ShardedKVCache;
class VExprContext;
struct RowLineageColumns;
} // namespace doris

namespace doris {

/// Parquet-specific initialization context.
/// Extends ReaderInitContext with predicate pushdown fields.
struct ParquetInitContext final : public ReaderInitContext {
    // Safe defaults for standalone readers (delete file readers, push handler)
    // that don't have conjuncts/predicates. Dereferenced by _do_init_reader.
    static inline const VExprContextSPtrs EMPTY_CONJUNCTS {};
    static inline phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>
            EMPTY_SLOT_PREDICATES {};

    const VExprContextSPtrs* conjuncts = &EMPTY_CONJUNCTS;
    phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>*
            slot_id_to_predicates = &EMPTY_SLOT_PREDICATES;
    const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
    const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
    const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
    bool filter_groups = true;
};

class ParquetReader : public TableFormatReader {
    ENABLE_FACTORY_CREATOR(ParquetReader);

public:
    struct ReaderStatistics {
        int32_t filtered_row_groups = 0;
        int32_t filtered_row_groups_by_min_max = 0;
        int32_t filtered_row_groups_by_bloom_filter = 0;
        int32_t read_row_groups = 0;
        int64_t filtered_group_rows = 0;
        int64_t filtered_page_rows = 0;
        int64_t lazy_read_filtered_rows = 0;
        int64_t read_rows = 0;
        int64_t filtered_bytes = 0;
        int64_t column_read_time = 0;
        int64_t parse_meta_time = 0;
        int64_t parse_footer_time = 0;
        int64_t file_footer_read_calls = 0;
        int64_t file_footer_hit_cache = 0;
        int64_t file_reader_create_time = 0;
        int64_t open_file_num = 0;
        int64_t row_group_filter_time = 0;
        int64_t page_index_filter_time = 0;
        int64_t read_page_index_time = 0;
        int64_t parse_page_index_time = 0;
        int64_t predicate_filter_time = 0;
        int64_t dict_filter_rewrite_time = 0;
        int64_t bloom_filter_read_time = 0;
    };

    ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
                  const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
                  io::IOContext* io_ctx, RuntimeState* state, FileMetaCache* meta_cache = nullptr,
                  bool enable_lazy_mat = true);

    ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
                  const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
                  std::shared_ptr<io::IOContext> io_ctx_holder, RuntimeState* state,
                  FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);

    ParquetReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
                  io::IOContext* io_ctx, RuntimeState* state, FileMetaCache* meta_cache = nullptr,
                  bool enable_lazy_mat = true);

    ParquetReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
                  std::shared_ptr<io::IOContext> io_ctx_holder, RuntimeState* state,
                  FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);

    ~ParquetReader() override;
#ifdef BE_TEST
    // for unit test
    void set_file_reader(io::FileReaderSPtr file_reader);
#endif

    // Override to build table_info_node from Parquet file metadata using by_parquet_name.
    // Subclasses (HiveParquetReader, etc.) call GenericReader::on_before_init_reader directly,
    // so this override only applies to plain ParquetReader (TVF, load).
    Status on_before_init_reader(ReaderInitContext* ctx) override;

    void set_batch_size(size_t batch_size) override;

    Status close() override;

    // set the delete rows in current parquet file
    void set_delete_rows(const std::vector<int64_t>* delete_rows) { _delete_rows = delete_rows; }

    int64_t size() const { return _file_reader->size(); }

    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;

    Status init_schema_reader() override;

    Status get_parsed_schema(std::vector<std::string>* col_names,
                             std::vector<DataTypePtr>* col_types) override;

    ReaderStatistics& reader_statistics() { return _reader_statistics; }

    const tparquet::FileMetaData* get_meta_data() const { return _t_metadata; }

    Status get_file_metadata_schema(const FieldDescriptor** ptr);

    void set_create_row_id_column_iterator_func(
            std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()> create_func) {
        _create_topn_row_id_column_iterator = create_func;
    }

    /// Access current batch row positions (delegates to RowGroupReader).
    /// Used by IcebergReaderMixin to build $row_id column.
    const std::vector<segment_v2::rowid_t>& current_batch_row_positions() const {
        return _current_group_reader->current_batch_row_positions();
    }

    Status fill_topn_row_id(
            std::shared_ptr<segment_v2::RowIdColumnIteratorV2> _row_id_column_iterator,
            std::string col_name, Block* block, size_t rows) {
        int col_pos = block->get_position_by_name(col_name);
        DCHECK(col_pos >= 0);
        if (col_pos < 0) {
            return Status::InternalError("Column {} not found in block", col_name);
        }
        auto col = block->get_by_position(col_pos).column->assume_mutable();
        const auto& row_ids = this->current_batch_row_positions();
        RETURN_IF_ERROR(
                _row_id_column_iterator->read_by_rowids(row_ids.data(), row_ids.size(), col));

        return Status::OK();
    }

    bool count_read_rows() override { return true; }

    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override;

    bool supports_count_pushdown() const override { return true; }

    int64_t get_total_rows() const override;

    bool has_delete_operations() const override {
        return _delete_rows != nullptr && !_delete_rows->empty();
    }

    /// Disable row-group range filtering (needed when reading delete files
    /// whose TFileRangeDesc has size=-1).
    void set_filter_groups(bool v) { _filter_groups = v; }

protected:
    // ---- Unified init_reader(ReaderInitContext*) overrides ----
    Status _open_file_reader(ReaderInitContext* ctx) override;
    Status _do_init_reader(ReaderInitContext* ctx) override;

    void _collect_profile_before_close() override;

    // Core block reading implementation
    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;

    // Parquet fills partition/missing columns per-batch internally via RowGroupReader,
    // so suppress TableFormatReader's default on_after_read_block fill.
    Status on_after_read_block(Block* /*block*/, size_t* /*read_rows*/) override {
        return Status::OK();
    }

    // Protected accessors so CRTP mixin subclasses can reach private members
    io::IOContext* get_io_ctx() const { return _io_ctx; }
    std::unordered_map<std::string, uint32_t>*& col_name_to_block_idx_ref() {
        return _col_name_to_block_idx;
    }
    RuntimeProfile* get_profile() const { return _profile; }
    RuntimeState* get_state() const { return _state; }
    const TFileScanRangeParams& get_scan_params() const { return _scan_params; }
    const TFileRangeDesc& get_scan_range() const { return _scan_range; }
    const TupleDescriptor* get_tuple_descriptor() const { return _tuple_descriptor; }
    const RowDescriptor* get_row_descriptor() const { return _row_descriptor; }
    const FileMetaData* get_file_metadata() const { return _file_metadata; }
    const FieldDescriptor& parquet_file_schema() const;
    void prepare_parquet_file_schema_with_ids(const FieldDescriptor* field_desc);

private:
    static ColumnIdResult _create_column_ids_by_name(const FieldDescriptor* field_desc,
                                                     const TupleDescriptor* tuple_descriptor);
    std::string _selected_leaf_column_paths() const;

    struct ParquetProfile {
        RuntimeProfile::Counter* filtered_row_groups = nullptr;
        RuntimeProfile::Counter* filtered_row_groups_by_min_max = nullptr;
        RuntimeProfile::Counter* filtered_row_groups_by_bloom_filter = nullptr;
        RuntimeProfile::Counter* to_read_row_groups = nullptr;
        RuntimeProfile::Counter* total_row_groups = nullptr;
        RuntimeProfile::Counter* filtered_group_rows = nullptr;
        RuntimeProfile::Counter* filtered_page_rows = nullptr;
        RuntimeProfile::Counter* lazy_read_filtered_rows = nullptr;
        RuntimeProfile::Counter* filtered_bytes = nullptr;
        RuntimeProfile::Counter* raw_rows_read = nullptr;
        RuntimeProfile::Counter* column_read_time = nullptr;
        RuntimeProfile::Counter* parse_meta_time = nullptr;
        RuntimeProfile::Counter* parse_footer_time = nullptr;
        RuntimeProfile::Counter* file_reader_create_time = nullptr;
        RuntimeProfile::Counter* open_file_num = nullptr;
        RuntimeProfile::Counter* row_group_filter_time = nullptr;
        RuntimeProfile::Counter* page_index_read_calls = nullptr;
        RuntimeProfile::Counter* page_index_filter_time = nullptr;
        RuntimeProfile::Counter* read_page_index_time = nullptr;
        RuntimeProfile::Counter* parse_page_index_time = nullptr;
        RuntimeProfile::Counter* file_footer_read_calls = nullptr;
        RuntimeProfile::Counter* file_footer_hit_cache = nullptr;
        RuntimeProfile::Counter* decompress_time = nullptr;
        RuntimeProfile::Counter* decompress_cnt = nullptr;
        RuntimeProfile::Counter* page_read_counter = nullptr;
        RuntimeProfile::Counter* page_cache_write_counter = nullptr;
        RuntimeProfile::Counter* page_cache_compressed_write_counter = nullptr;
        RuntimeProfile::Counter* page_cache_decompressed_write_counter = nullptr;
        RuntimeProfile::Counter* page_cache_hit_counter = nullptr;
        RuntimeProfile::Counter* page_cache_missing_counter = nullptr;
        RuntimeProfile::Counter* page_cache_compressed_hit_counter = nullptr;
        RuntimeProfile::Counter* page_cache_decompressed_hit_counter = nullptr;
        RuntimeProfile::Counter* decode_header_time = nullptr;
        RuntimeProfile::Counter* read_page_header_time = nullptr;
        RuntimeProfile::Counter* decode_value_time = nullptr;
        RuntimeProfile::Counter* decode_dict_time = nullptr;
        RuntimeProfile::Counter* decode_level_time = nullptr;
        RuntimeProfile::Counter* decode_null_map_time = nullptr;
        RuntimeProfile::Counter* skip_page_header_num = nullptr;
        RuntimeProfile::Counter* parse_page_header_num = nullptr;
        RuntimeProfile::Counter* predicate_filter_time = nullptr;
        RuntimeProfile::Counter* dict_filter_rewrite_time = nullptr;
        RuntimeProfile::Counter* convert_time = nullptr;
        RuntimeProfile::Counter* bloom_filter_read_time = nullptr;
        RuntimeProfile::Counter* variant_direct_typed_value_read_rows = nullptr;
        RuntimeProfile::Counter* variant_rowwise_read_rows = nullptr;
    };

    // ---- set_fill_columns sub-functions ----
    void _collect_predicate_columns_from_conjuncts(
            std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns);
    void _classify_columns_for_lazy_read(
            const std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns,
            const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
                    partition_columns,
            const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);

    Status _open_file();
    void _init_profile();
    void _close_internal();
    Status _next_row_group_reader();
    RowGroupReader::PositionDeleteContext _get_position_delete_ctx(
            const tparquet::RowGroup& row_group,
            const RowGroupReader::RowGroupIndex& row_group_index);
    void _init_system_properties();
    void _init_file_description();

    // At the beginning of reading next row group, index should be loaded and used to filter data efficiently.
    Status _process_page_index_filter(
            const tparquet::RowGroup& row_group,
            const RowGroupReader::RowGroupIndex& row_group_index,
            const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
            RowRanges* candidate_row_ranges);

    // check this range contain this row group.
    bool _is_misaligned_range_group(const tparquet::RowGroup& row_group) const;

    // Row Group min-max Filter
    Status _process_column_stat_filter(
            const tparquet::RowGroup& row_group,
            const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
            bool* filter_group, bool* filtered_by_min_max, bool* filtered_by_bloom_filter);

    /*
     * 1. row group min-max filter
     * 2. row group bloom filter
     * 3. page index min-max filter
     *
     * return Status && row_ranges (lines to be read)
     */
    Status _process_min_max_bloom_filter(
            const RowGroupReader::RowGroupIndex& row_group_index,
            const tparquet::RowGroup& row_group,
            const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
            RowRanges* row_ranges);

    int64_t _get_column_start_offset(
            const tparquet::ColumnMetaData& column_init_column_readers) const;
    std::string _meta_cache_key(const std::string& path) { return "meta_" + path; }
    std::vector<io::PrefetchRange> _generate_random_access_ranges(
            const RowGroupReader::RowGroupIndex& group, size_t* avg_io_size);
    void _collect_profile();

    Status _set_read_one_line_impl() override { return Status::OK(); }

    bool _exists_in_file(const std::string& expr_name) const;
    bool _type_matches(const int cid) const;
    void _init_read_columns(const std::vector<std::string>& column_names);

    io::FileSystemProperties _system_properties;
    io::FileDescription _file_description;

    // the following fields are for parquet meta data cache.
    // if _meta_cache is not null, the _file_metadata will be got from _meta_cache,
    // and it is owned by _meta_cache_handle.
    // if _meta_cache is null, _file_metadata will be managed by _file_metadata_ptr,
    // which will be released when deconstructing.
    // ATTN: these fields must be before _file_reader, to make sure they will be released
    // after _file_reader. Otherwise, there may be heap-use-after-free bug.
    ObjLRUCache::CacheHandle _meta_cache_handle;
    std::unique_ptr<FileMetaData> _file_metadata_ptr;
    std::optional<FieldDescriptor> _file_schema_with_ids;
    const tparquet::FileMetaData* _t_metadata = nullptr;

    // _tracing_file_reader wraps _file_reader.
    // _file_reader is original file reader.
    // _tracing_file_reader is tracing file reader with io context.
    // If io_ctx is null, _tracing_file_reader will be the same as file_reader.
    io::FileReaderSPtr _file_reader = nullptr;
    io::FileReaderSPtr _tracing_file_reader = nullptr;
    std::unique_ptr<RowGroupReader> _current_group_reader;

    RowGroupReader::RowGroupIndex _current_row_group_index {-1, 0, 0};
    // read to the end of current reader
    bool _row_group_eof = true;
    size_t _total_groups = 0; // num of groups(stripes) of a parquet(orc) file

    std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;

    // Through this node, you can find the file column based on the table column.
    std::shared_ptr<TableSchemaChangeHelper::Node> _table_info_node_ptr =
            TableSchemaChangeHelper::ConstNode::get_instance();

    //sequence in file, need to read
    std::vector<std::string> _read_table_columns;
    std::vector<std::string> _read_file_columns;
    // The set of file columns to be read; only columns within this set will be filtered using the min-max predicate.
    std::set<std::string> _read_table_columns_set;
    // Deleted rows will be marked by Iceberg/Paimon. So we should filter deleted rows when reading it.
    const std::vector<int64_t>* _delete_rows = nullptr;
    int64_t _delete_rows_index = 0;

    // parquet file reader object
    RuntimeProfile* _profile = nullptr;
    const TFileScanRangeParams& _scan_params;
    const TFileRangeDesc& _scan_range;
    size_t _batch_size;
    // Bytes-per-row estimate from the previous batch, used to pre-shrink _batch_size
    // before reading so that oversized blocks are prevented from the current call onward.
    // Zero means no prior data (first batch).
    size_t _load_bytes_per_row = 0;
    int64_t _range_start_offset;
    int64_t _range_size;
    const cctz::time_zone* _ctz = nullptr;

    std::unordered_map<int, tparquet::OffsetIndex> _col_offsets;

    ReaderStatistics _reader_statistics;
    ParquetColumnReader::ColumnStatistics _column_statistics;
    ParquetProfile _parquet_profile;
    bool _closed = false;
    io::IOContext* _io_ctx = nullptr;
    std::shared_ptr<io::IOContext> _io_ctx_holder;
    RuntimeState* _state = nullptr;
    const TupleDescriptor* _tuple_descriptor = nullptr;
    const RowDescriptor* _row_descriptor = nullptr;
    const FileMetaData* _file_metadata = nullptr;
    // Pointer to external column name to block index mapping (from FileScanner)
    std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
    bool _enable_lazy_mat = true;
    bool _enable_filter_by_min_max = true;
    bool _enable_filter_by_bloom_filter = true;
    const std::unordered_map<std::string, int>* _colname_to_slot_id = nullptr;
    const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
    const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr;
    std::unordered_map<tparquet::Type::type, bool> _ignored_stats;
    size_t get_batch_size() const override { return _batch_size; }

protected:
    // Used for column lazy read. Protected so Iceberg/Paimon subclasses can
    // register synthesized columns in on_before_init_reader.
    RowGroupReader::LazyReadContext _lazy_read_ctx;
    bool _filter_groups = true;

    std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()>
            _create_topn_row_id_column_iterator;

private:
    std::set<uint64_t> _column_ids;
    std::set<uint64_t> _filter_column_ids;

    std::vector<std::unique_ptr<MutilColumnBlockPredicate>> _push_down_predicates;
    Arena _arena;
};

} // namespace doris

Coverage Report

Created: 2026-05-17 15:02

Line	Count	Source
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#pragma once
19
20		#include <gen_cpp/parquet_types.h>
21
22		#include <cstddef>
23		#include <cstdint>
24		#include <list>
25		#include <memory>
26		#include <optional>
27		#include <set>
28		#include <string>
29		#include <tuple>
30		#include <unordered_map>
31		#include <unordered_set>
32		#include <vector>
33
34		#include "common/status.h"
35		#include "format/parquet/parquet_common.h"
36		#include "format/parquet/parquet_predicate.h"
37		#include "format/parquet/vparquet_column_reader.h"
38		#include "format/parquet/vparquet_group_reader.h"
39		#include "format/table/table_format_reader.h"
40		#include "format/table/table_schema_change_helper.h"
41		#include "io/file_factory.h"
42		#include "io/fs/file_meta_cache.h"
43		#include "io/fs/file_reader.h"
44		#include "io/fs/file_reader_writer_fwd.h"
45		#include "runtime/runtime_profile.h"
46		#include "storage/olap_scan_common.h"
47		#include "util/obj_lru_cache.h"
48
49		namespace cctz {
50		class time_zone;
51		} // namespace cctz
52		namespace doris {
53		class RowDescriptor;
54		class RuntimeState;
55		class SlotDescriptor;
56		class TFileRangeDesc;
57		class TFileScanRangeParams;
58		class TupleDescriptor;
59
60		namespace io {
61		class FileSystem;
62		struct IOContext;
63		} // namespace io
64		class Block;
65		class FileMetaData;
66		class PageIndex;
67		class ShardedKVCache;
68		class VExprContext;
69		struct RowLineageColumns;
70		} // namespace doris
71
72		namespace doris {
73
74		/// Parquet-specific initialization context.
75		/// Extends ReaderInitContext with predicate pushdown fields.
76		struct ParquetInitContext final : public ReaderInitContext {
77		// Safe defaults for standalone readers (delete file readers, push handler)
78		// that don't have conjuncts/predicates. Dereferenced by _do_init_reader.
79		static inline const VExprContextSPtrs EMPTY_CONJUNCTS {};
80		static inline phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>
81		EMPTY_SLOT_PREDICATES {};
82
83		const VExprContextSPtrs* conjuncts = &EMPTY_CONJUNCTS;
84		phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>*
85		slot_id_to_predicates = &EMPTY_SLOT_PREDICATES;
86		const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
87		const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
88		const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts = nullptr;
89		bool filter_groups = true;
90		};
91
92		class ParquetReader : public TableFormatReader {
93		ENABLE_FACTORY_CREATOR(ParquetReader);
94
95		public:
96		struct ReaderStatistics {
97		int32_t filtered_row_groups = 0;
98		int32_t filtered_row_groups_by_min_max = 0;
99		int32_t filtered_row_groups_by_bloom_filter = 0;
100		int32_t read_row_groups = 0;
101		int64_t filtered_group_rows = 0;
102		int64_t filtered_page_rows = 0;
103		int64_t lazy_read_filtered_rows = 0;
104		int64_t read_rows = 0;
105		int64_t filtered_bytes = 0;
106		int64_t column_read_time = 0;
107		int64_t parse_meta_time = 0;
108		int64_t parse_footer_time = 0;
109		int64_t file_footer_read_calls = 0;
110		int64_t file_footer_hit_cache = 0;
111		int64_t file_reader_create_time = 0;
112		int64_t open_file_num = 0;
113		int64_t row_group_filter_time = 0;
114		int64_t page_index_filter_time = 0;
115		int64_t read_page_index_time = 0;
116		int64_t parse_page_index_time = 0;
117		int64_t predicate_filter_time = 0;
118		int64_t dict_filter_rewrite_time = 0;
119		int64_t bloom_filter_read_time = 0;
120		};
121
122		ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
123		const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
124		io::IOContext* io_ctx, RuntimeState* state, FileMetaCache* meta_cache = nullptr,
125		bool enable_lazy_mat = true);
126
127		ParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
128		const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz,
129		std::shared_ptr<io::IOContext> io_ctx_holder, RuntimeState* state,
130		FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);
131
132		ParquetReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
133		io::IOContext* io_ctx, RuntimeState* state, FileMetaCache* meta_cache = nullptr,
134		bool enable_lazy_mat = true);
135
136		ParquetReader(const TFileScanRangeParams& params, const TFileRangeDesc& range,
137		std::shared_ptr<io::IOContext> io_ctx_holder, RuntimeState* state,
138		FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true);
139
140		~ParquetReader() override;
141		#ifdef BE_TEST
142		// for unit test
143		void set_file_reader(io::FileReaderSPtr file_reader);
144		#endif
145
146		// Override to build table_info_node from Parquet file metadata using by_parquet_name.
147		// Subclasses (HiveParquetReader, etc.) call GenericReader::on_before_init_reader directly,
148		// so this override only applies to plain ParquetReader (TVF, load).
149		Status on_before_init_reader(ReaderInitContext* ctx) override;
150
151		void set_batch_size(size_t batch_size) override;
152
153		Status close() override;
154
155		// set the delete rows in current parquet file
156	2	void set_delete_rows(const std::vector<int64_t>* delete_rows) { _delete_rows = delete_rows; }
157
158	0	int64_t size() const { return _file_reader->size(); }
159
160		Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override;
161
162		Status init_schema_reader() override;
163
164		Status get_parsed_schema(std::vector<std::string>* col_names,
165		std::vector<DataTypePtr>* col_types) override;
166
167	0	ReaderStatistics& reader_statistics() { return _reader_statistics; }
168
169	2	const tparquet::FileMetaData* get_meta_data() const { return _t_metadata; }
170
171		Status get_file_metadata_schema(const FieldDescriptor** ptr);
172
173		void set_create_row_id_column_iterator_func(
174	14	std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()> create_func) {
175	14	_create_topn_row_id_column_iterator = create_func;
176	14	}
177
178		/// Access current batch row positions (delegates to RowGroupReader).
179		/// Used by IcebergReaderMixin to build $row_id column.
180	5	const std::vector<segment_v2::rowid_t>& current_batch_row_positions() const {
181	5	return _current_group_reader->current_batch_row_positions();
182	5	}
183
184		Status fill_topn_row_id(
185		std::shared_ptr<segment_v2::RowIdColumnIteratorV2> _row_id_column_iterator,
186	5	std::string col_name, Block* block, size_t rows) {
187	5	int col_pos = block->get_position_by_name(col_name);
188	5	DCHECK(col_pos >= 0);
189	5	if (col_pos < 0) {
190	0	return Status::InternalError("Column {} not found in block", col_name);
191	0	}
192	5	auto col = block->get_by_position(col_pos).column->assume_mutable();
193	5	const auto& row_ids = this->current_batch_row_positions();
194	5	RETURN_IF_ERROR(
195	5	_row_id_column_iterator->read_by_rowids(row_ids.data(), row_ids.size(), col));
196
197	5	return Status::OK();
198	5	}
199
200	14	bool count_read_rows() override { return true; }
201
202		void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override;
203
204	0	bool supports_count_pushdown() const override { return true; }
205
206		int64_t get_total_rows() const override;
207
208	4	bool has_delete_operations() const override {
209	4	return _delete_rows != nullptr && !_delete_rows->empty();
210	4	}
211
212		/// Disable row-group range filtering (needed when reading delete files
213		/// whose TFileRangeDesc has size=-1).
214	0	void set_filter_groups(bool v) { _filter_groups = v; }
215
216		protected:
217		// ---- Unified init_reader(ReaderInitContext*) overrides ----
218		Status _open_file_reader(ReaderInitContext* ctx) override;
219		Status _do_init_reader(ReaderInitContext* ctx) override;
220
221		void _collect_profile_before_close() override;
222
223		// Core block reading implementation
224		Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
225
226		// Parquet fills partition/missing columns per-batch internally via RowGroupReader,
227		// so suppress TableFormatReader's default on_after_read_block fill.
228	97	Status on_after_read_block(Block* /block/, size_t* /read_rows/) override {
229	97	return Status::OK();
230	97	}
231
232		// Protected accessors so CRTP mixin subclasses can reach private members
233	0	io::IOContext* get_io_ctx() const { return _io_ctx; }
234	0	std::unordered_map<std::string, uint32_t>*& col_name_to_block_idx_ref() {
235	0	return _col_name_to_block_idx;
236	0	}
237	42	RuntimeProfile* get_profile() const { return _profile; }
238	28	RuntimeState* get_state() const { return _state; }
239	1	const TFileScanRangeParams& get_scan_params() const { return _scan_params; }
240	1	const TFileRangeDesc& get_scan_range() const { return _scan_range; }
241	0	const TupleDescriptor* get_tuple_descriptor() const { return _tuple_descriptor; }
242	0	const RowDescriptor* get_row_descriptor() const { return _row_descriptor; }
243	0	const FileMetaData* get_file_metadata() const { return _file_metadata; }
244		const FieldDescriptor& parquet_file_schema() const;
245		void prepare_parquet_file_schema_with_ids(const FieldDescriptor* field_desc);
246
247		private:
248		static ColumnIdResult _create_column_ids_by_name(const FieldDescriptor* field_desc,
249		const TupleDescriptor* tuple_descriptor);
250		std::string _selected_leaf_column_paths() const;
251
252		struct ParquetProfile {
253		RuntimeProfile::Counter* filtered_row_groups = nullptr;
254		RuntimeProfile::Counter* filtered_row_groups_by_min_max = nullptr;
255		RuntimeProfile::Counter* filtered_row_groups_by_bloom_filter = nullptr;
256		RuntimeProfile::Counter* to_read_row_groups = nullptr;
257		RuntimeProfile::Counter* total_row_groups = nullptr;
258		RuntimeProfile::Counter* filtered_group_rows = nullptr;
259		RuntimeProfile::Counter* filtered_page_rows = nullptr;
260		RuntimeProfile::Counter* lazy_read_filtered_rows = nullptr;
261		RuntimeProfile::Counter* filtered_bytes = nullptr;
262		RuntimeProfile::Counter* raw_rows_read = nullptr;
263		RuntimeProfile::Counter* column_read_time = nullptr;
264		RuntimeProfile::Counter* parse_meta_time = nullptr;
265		RuntimeProfile::Counter* parse_footer_time = nullptr;
266		RuntimeProfile::Counter* file_reader_create_time = nullptr;
267		RuntimeProfile::Counter* open_file_num = nullptr;
268		RuntimeProfile::Counter* row_group_filter_time = nullptr;
269		RuntimeProfile::Counter* page_index_read_calls = nullptr;
270		RuntimeProfile::Counter* page_index_filter_time = nullptr;
271		RuntimeProfile::Counter* read_page_index_time = nullptr;
272		RuntimeProfile::Counter* parse_page_index_time = nullptr;
273		RuntimeProfile::Counter* file_footer_read_calls = nullptr;
274		RuntimeProfile::Counter* file_footer_hit_cache = nullptr;
275		RuntimeProfile::Counter* decompress_time = nullptr;
276		RuntimeProfile::Counter* decompress_cnt = nullptr;
277		RuntimeProfile::Counter* page_read_counter = nullptr;
278		RuntimeProfile::Counter* page_cache_write_counter = nullptr;
279		RuntimeProfile::Counter* page_cache_compressed_write_counter = nullptr;
280		RuntimeProfile::Counter* page_cache_decompressed_write_counter = nullptr;
281		RuntimeProfile::Counter* page_cache_hit_counter = nullptr;
282		RuntimeProfile::Counter* page_cache_missing_counter = nullptr;
283		RuntimeProfile::Counter* page_cache_compressed_hit_counter = nullptr;
284		RuntimeProfile::Counter* page_cache_decompressed_hit_counter = nullptr;
285		RuntimeProfile::Counter* decode_header_time = nullptr;
286		RuntimeProfile::Counter* read_page_header_time = nullptr;
287		RuntimeProfile::Counter* decode_value_time = nullptr;
288		RuntimeProfile::Counter* decode_dict_time = nullptr;
289		RuntimeProfile::Counter* decode_level_time = nullptr;
290		RuntimeProfile::Counter* decode_null_map_time = nullptr;
291		RuntimeProfile::Counter* skip_page_header_num = nullptr;
292		RuntimeProfile::Counter* parse_page_header_num = nullptr;
293		RuntimeProfile::Counter* predicate_filter_time = nullptr;
294		RuntimeProfile::Counter* dict_filter_rewrite_time = nullptr;
295		RuntimeProfile::Counter* convert_time = nullptr;
296		RuntimeProfile::Counter* bloom_filter_read_time = nullptr;
297		RuntimeProfile::Counter* variant_direct_typed_value_read_rows = nullptr;
298		RuntimeProfile::Counter* variant_rowwise_read_rows = nullptr;
299		};
300
301		// ---- set_fill_columns sub-functions ----
302		void _collect_predicate_columns_from_conjuncts(
303		std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns);
304		void _classify_columns_for_lazy_read(
305		const std::unordered_map<std::string, std::pair<uint32_t, int>>& predicate_columns,
306		const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>&
307		partition_columns,
308		const std::unordered_map<std::string, VExprContextSPtr>& missing_columns);
309
310		Status _open_file();
311		void _init_profile();
312		void _close_internal();
313		Status _next_row_group_reader();
314		RowGroupReader::PositionDeleteContext _get_position_delete_ctx(
315		const tparquet::RowGroup& row_group,
316		const RowGroupReader::RowGroupIndex& row_group_index);
317		void _init_system_properties();
318		void _init_file_description();
319
320		// At the beginning of reading next row group, index should be loaded and used to filter data efficiently.
321		Status _process_page_index_filter(
322		const tparquet::RowGroup& row_group,
323		const RowGroupReader::RowGroupIndex& row_group_index,
324		const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
325		RowRanges* candidate_row_ranges);
326
327		// check this range contain this row group.
328		bool _is_misaligned_range_group(const tparquet::RowGroup& row_group) const;
329
330		// Row Group min-max Filter
331		Status _process_column_stat_filter(
332		const tparquet::RowGroup& row_group,
333		const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
334		bool* filter_group, bool* filtered_by_min_max, bool* filtered_by_bloom_filter);
335
336		/*
337		* 1. row group min-max filter
338		* 2. row group bloom filter
339		* 3. page index min-max filter
340		*
341		* return Status && row_ranges (lines to be read)
342		*/
343		Status _process_min_max_bloom_filter(
344		const RowGroupReader::RowGroupIndex& row_group_index,
345		const tparquet::RowGroup& row_group,
346		const std::vector<std::unique_ptr<MutilColumnBlockPredicate>>& push_down_pred,
347		RowRanges* row_ranges);
348
349		int64_t _get_column_start_offset(
350		const tparquet::ColumnMetaData& column_init_column_readers) const;
351	0	std::string _meta_cache_key(const std::string& path) { return "meta_" + path; }
352		std::vector<io::PrefetchRange> _generate_random_access_ranges(
353		const RowGroupReader::RowGroupIndex& group, size_t* avg_io_size);
354		void _collect_profile();
355
356	19	Status _set_read_one_line_impl() override { return Status::OK(); }
357
358		bool _exists_in_file(const std::string& expr_name) const;
359		bool _type_matches(const int cid) const;
360		void _init_read_columns(const std::vector<std::string>& column_names);
361
362		io::FileSystemProperties _system_properties;
363		io::FileDescription _file_description;
364
365		// the following fields are for parquet meta data cache.
366		// if _meta_cache is not null, the _file_metadata will be got from _meta_cache,
367		// and it is owned by _meta_cache_handle.
368		// if _meta_cache is null, _file_metadata will be managed by _file_metadata_ptr,
369		// which will be released when deconstructing.
370		// ATTN: these fields must be before _file_reader, to make sure they will be released
371		// after _file_reader. Otherwise, there may be heap-use-after-free bug.
372		ObjLRUCache::CacheHandle _meta_cache_handle;
373		std::unique_ptr<FileMetaData> _file_metadata_ptr;
374		std::optional<FieldDescriptor> _file_schema_with_ids;
375		const tparquet::FileMetaData* _t_metadata = nullptr;
376
377		// _tracing_file_reader wraps _file_reader.
378		// _file_reader is original file reader.
379		// _tracing_file_reader is tracing file reader with io context.
380		// If io_ctx is null, _tracing_file_reader will be the same as file_reader.
381		io::FileReaderSPtr _file_reader = nullptr;
382		io::FileReaderSPtr _tracing_file_reader = nullptr;
383		std::unique_ptr<RowGroupReader> _current_group_reader;
384
385		RowGroupReader::RowGroupIndex _current_row_group_index {-1, 0, 0};
386		// read to the end of current reader
387		bool _row_group_eof = true;
388		size_t _total_groups = 0; // num of groups(stripes) of a parquet(orc) file
389
390		std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
391
392		// Through this node, you can find the file column based on the table column.
393		std::shared_ptr<TableSchemaChangeHelper::Node> _table_info_node_ptr =
394		TableSchemaChangeHelper::ConstNode::get_instance();
395
396		//sequence in file, need to read
397		std::vector<std::string> _read_table_columns;
398		std::vector<std::string> _read_file_columns;
399		// The set of file columns to be read; only columns within this set will be filtered using the min-max predicate.
400		std::set<std::string> _read_table_columns_set;
401		// Deleted rows will be marked by Iceberg/Paimon. So we should filter deleted rows when reading it.
402		const std::vector<int64_t>* _delete_rows = nullptr;
403		int64_t _delete_rows_index = 0;
404
405		// parquet file reader object
406		RuntimeProfile* _profile = nullptr;
407		const TFileScanRangeParams& _scan_params;
408		const TFileRangeDesc& _scan_range;
409		size_t _batch_size;
410		// Bytes-per-row estimate from the previous batch, used to pre-shrink _batch_size
411		// before reading so that oversized blocks are prevented from the current call onward.
412		// Zero means no prior data (first batch).
413		size_t _load_bytes_per_row = 0;
414		int64_t _range_start_offset;
415		int64_t _range_size;
416		const cctz::time_zone* _ctz = nullptr;
417
418		std::unordered_map<int, tparquet::OffsetIndex> _col_offsets;
419
420		ReaderStatistics _reader_statistics;
421		ParquetColumnReader::ColumnStatistics _column_statistics;
422		ParquetProfile _parquet_profile;
423		bool _closed = false;
424		io::IOContext* _io_ctx = nullptr;
425		std::shared_ptr<io::IOContext> _io_ctx_holder;
426		RuntimeState* _state = nullptr;
427		const TupleDescriptor* _tuple_descriptor = nullptr;
428		const RowDescriptor* _row_descriptor = nullptr;
429		const FileMetaData* _file_metadata = nullptr;
430		// Pointer to external column name to block index mapping (from FileScanner)
431		std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
432		bool _enable_lazy_mat = true;
433		bool _enable_filter_by_min_max = true;
434		bool _enable_filter_by_bloom_filter = true;
435		const std::unordered_map<std::string, int>* _colname_to_slot_id = nullptr;
436		const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr;
437		const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr;
438		std::unordered_map<tparquet::Type::type, bool> _ignored_stats;
439	0	size_t get_batch_size() const override { return _batch_size; }
440
441		protected:
442		// Used for column lazy read. Protected so Iceberg/Paimon subclasses can
443		// register synthesized columns in on_before_init_reader.
444		RowGroupReader::LazyReadContext _lazy_read_ctx;
445		bool _filter_groups = true;
446
447		std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()>
448		_create_topn_row_id_column_iterator;
449
450		private:
451		std::set<uint64_t> _column_ids;
452		std::set<uint64_t> _filter_column_ids;
453
454		std::vector<std::unique_ptr<MutilColumnBlockPredicate>> _push_down_predicates;
455		Arena _arena;
456		};
457
458		} // namespace doris