Coverage Report

Created: 2026-04-07 18:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/parquet_metadata_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/PlanNodes_types.h>
21
22
#include <map>
23
#include <memory>
24
#include <string>
25
#include <vector>
26
27
#include "common/factory_creator.h"
28
#include "common/status.h"
29
#include "format/generic_reader.h"
30
#include "runtime/descriptors.h"
31
32
namespace doris {
33
class RuntimeProfile;
34
class RuntimeState;
35
namespace io {
36
class FileReader;
37
} // namespace io
38
} // namespace doris
39
40
namespace doris {
41
class Block;
42
43
// Lightweight reader that surfaces Parquet footer metadata as a table-valued scan.
44
// It reads only file footers (no data pages) and emits either schema rows or
45
// row-group/column statistics based on `mode`.
46
class ParquetMetadataReader : public GenericReader {
47
    ENABLE_FACTORY_CREATOR(ParquetMetadataReader);
48
49
public:
50
    class ModeHandler;
51
52
    ParquetMetadataReader(std::vector<SlotDescriptor*> slots, RuntimeState* state,
53
                          RuntimeProfile* profile, TMetaScanRange scan_range);
54
    ~ParquetMetadataReader() override;
55
56
    Status init_reader();
57
    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
58
    Status close() override;
59
60
protected:
61
0
    Status _do_init_reader(ReaderInitContext* /*ctx*/) override { return init_reader(); }
62
63
private:
64
    Status _init_from_scan_range(const TMetaScanRange& scan_range);
65
    Status _build_rows(std::vector<MutableColumnPtr>& columns);
66
    Status _append_file_rows(const std::string& path, std::vector<MutableColumnPtr>& columns);
67
68
    enum class Mode { SCHEMA, METADATA, FILE_METADATA, KEY_VALUE_METADATA, BLOOM_PROBE };
69
70
    RuntimeState* _state = nullptr;
71
    std::vector<SlotDescriptor*> _slots;
72
    TMetaScanRange _scan_range;
73
    std::vector<std::string> _paths;
74
    // File system type and properties for remote Parquet access.
75
    TFileType::type _file_type = TFileType::FILE_LOCAL;
76
    std::map<std::string, std::string> _properties;
77
    std::string _mode;
78
    Mode _mode_type = Mode::METADATA;
79
    std::string _bloom_column;
80
    std::string _bloom_literal;
81
    bool _eof = false;
82
    std::unique_ptr<ModeHandler> _mode_handler;
83
};
84
85
} // namespace doris