Coverage Report

Created: 2026-04-15 19:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/hive_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
#include <memory>
20
#include <vector>
21
22
#include "format/orc/vorc_reader.h"
23
#include "format/parquet/vparquet_reader.h"
24
#include "format/table/table_format_reader.h"
25
namespace doris {
26
27
// By holding a parquet/orc reader, used to read the parquet/orc table of hive.
28
class HiveReader : public TableFormatReader, public TableSchemaChangeHelper {
29
public:
30
    HiveReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
31
               RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range,
32
               io::IOContext* io_ctx, const std::set<TSlotId>* is_file_slot,
33
               FileMetaCache* meta_cache)
34
51
            : TableFormatReader(std::move(file_format_reader), state, profile, params, range,
35
51
                                io_ctx, meta_cache),
36
51
              _is_file_slot(is_file_slot) {};
37
38
51
    ~HiveReader() override = default;
39
40
    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
41
42
15
    Status init_row_filters() final { return Status::OK(); };
43
44
protected:
45
    // https://github.com/apache/doris/pull/23369
46
    const std::set<TSlotId>* _is_file_slot = nullptr;
47
};
48
49
class HiveOrcReader final : public HiveReader {
50
public:
51
    ENABLE_FACTORY_CREATOR(HiveOrcReader);
52
    HiveOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
53
                  RuntimeState* state, const TFileScanRangeParams& params,
54
                  const TFileRangeDesc& range, io::IOContext* io_ctx,
55
                  const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
56
24
            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
57
24
                         is_file_slot, meta_cache) {};
58
24
    ~HiveOrcReader() final = default;
59
60
    Status init_reader(
61
            const std::vector<std::string>& read_table_col_names,
62
            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
63
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
64
            const RowDescriptor* row_descriptor,
65
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
66
            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
67
68
private:
69
    static ColumnIdResult _create_column_ids(const orc::Type* orc_type,
70
                                             const TupleDescriptor* tuple_descriptor);
71
72
    static ColumnIdResult _create_column_ids_by_top_level_col_index(
73
            const orc::Type* orc_type, const TupleDescriptor* tuple_descriptor);
74
};
75
76
class HiveParquetReader final : public HiveReader {
77
public:
78
    ENABLE_FACTORY_CREATOR(HiveParquetReader);
79
    HiveParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
80
                      RuntimeState* state, const TFileScanRangeParams& params,
81
                      const TFileRangeDesc& range, io::IOContext* io_ctx,
82
                      const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
83
27
            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
84
27
                         is_file_slot, meta_cache) {};
85
27
    ~HiveParquetReader() final = default;
86
87
    Status init_reader(
88
            const std::vector<std::string>& read_table_col_names,
89
            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
90
            const VExprContextSPtrs& conjuncts,
91
            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
92
                    slot_id_to_predicates,
93
            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
94
            const std::unordered_map<std::string, int>* colname_to_slot_id,
95
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
96
            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
97
98
private:
99
    static ColumnIdResult _create_column_ids(const FieldDescriptor* field_desc,
100
                                             const TupleDescriptor* tuple_descriptor);
101
102
    static ColumnIdResult _create_column_ids_by_top_level_col_index(
103
            const FieldDescriptor* field_desc, const TupleDescriptor* tuple_descriptor);
104
};
105
} // namespace doris