Coverage Report

Created: 2026-03-12 17:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/hive_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
#include <memory>
20
#include <vector>
21
22
#include "format/orc/vorc_reader.h"
23
#include "format/parquet/vparquet_reader.h"
24
#include "format/table/table_format_reader.h"
25
namespace doris {
26
#include "common/compile_check_begin.h"
27
28
// By holding a parquet/orc reader, used to read the parquet/orc table of hive.
29
class HiveReader : public TableFormatReader, public TableSchemaChangeHelper {
30
public:
31
    HiveReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
32
               RuntimeState* state, const TFileScanRangeParams& params, const TFileRangeDesc& range,
33
               io::IOContext* io_ctx, const std::set<TSlotId>* is_file_slot,
34
               FileMetaCache* meta_cache)
35
36.0k
            : TableFormatReader(std::move(file_format_reader), state, profile, params, range,
36
36.0k
                                io_ctx, meta_cache),
37
36.0k
              _is_file_slot(is_file_slot) {};
38
39
36.1k
    ~HiveReader() override = default;
40
41
    Status get_next_block_inner(Block* block, size_t* read_rows, bool* eof) final;
42
43
12.3k
    Status init_row_filters() final { return Status::OK(); };
44
45
protected:
46
    // https://github.com/apache/doris/pull/23369
47
    const std::set<TSlotId>* _is_file_slot = nullptr;
48
};
49
50
class HiveOrcReader final : public HiveReader {
51
public:
52
    ENABLE_FACTORY_CREATOR(HiveOrcReader);
53
    HiveOrcReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
54
                  RuntimeState* state, const TFileScanRangeParams& params,
55
                  const TFileRangeDesc& range, io::IOContext* io_ctx,
56
                  const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
57
23.6k
            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
58
23.6k
                         is_file_slot, meta_cache) {};
59
    ~HiveOrcReader() final = default;
60
61
    Status init_reader(
62
            const std::vector<std::string>& read_table_col_names,
63
            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
64
            const VExprContextSPtrs& conjuncts, const TupleDescriptor* tuple_descriptor,
65
            const RowDescriptor* row_descriptor,
66
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
67
            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
68
69
private:
70
    static ColumnIdResult _create_column_ids(const orc::Type* orc_type,
71
                                             const TupleDescriptor* tuple_descriptor);
72
73
    static ColumnIdResult _create_column_ids_by_top_level_col_index(
74
            const orc::Type* orc_type, const TupleDescriptor* tuple_descriptor);
75
};
76
77
class HiveParquetReader final : public HiveReader {
78
public:
79
    ENABLE_FACTORY_CREATOR(HiveParquetReader);
80
    HiveParquetReader(std::unique_ptr<GenericReader> file_format_reader, RuntimeProfile* profile,
81
                      RuntimeState* state, const TFileScanRangeParams& params,
82
                      const TFileRangeDesc& range, io::IOContext* io_ctx,
83
                      const std::set<TSlotId>* is_file_slot, FileMetaCache* meta_cache)
84
12.3k
            : HiveReader(std::move(file_format_reader), profile, state, params, range, io_ctx,
85
12.3k
                         is_file_slot, meta_cache) {};
86
    ~HiveParquetReader() final = default;
87
88
    Status init_reader(
89
            const std::vector<std::string>& read_table_col_names,
90
            std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
91
            const VExprContextSPtrs& conjuncts,
92
            phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>>&
93
                    slot_id_to_predicates,
94
            const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor,
95
            const std::unordered_map<std::string, int>* colname_to_slot_id,
96
            const VExprContextSPtrs* not_single_slot_filter_conjuncts,
97
            const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts);
98
99
private:
100
    static ColumnIdResult _create_column_ids(const FieldDescriptor* field_desc,
101
                                             const TupleDescriptor* tuple_descriptor);
102
103
    static ColumnIdResult _create_column_ids_by_top_level_col_index(
104
            const FieldDescriptor* field_desc, const TupleDescriptor* tuple_descriptor);
105
};
106
#include "common/compile_check_end.h"
107
} // namespace doris