Coverage Report

Created: 2026-06-04 22:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/hudi_reader.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/table/hudi_reader.h"
19
20
#include <vector>
21
22
#include "common/status.h"
23
24
namespace doris {
25
26
// ============================================================================
27
// HudiParquetReader: on_before_init_reader
28
// ============================================================================
29
0
Status HudiParquetReader::on_before_init_reader(ReaderInitContext* ctx) {
30
0
    _column_descs = ctx->column_descs;
31
0
    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
32
0
    RETURN_IF_ERROR(_extract_partition_values(*ctx->range, ctx->tuple_descriptor,
33
0
                                              _fill_partition_values,
34
0
                                              &_fill_partition_value_is_null));
35
    // Get parquet file metadata schema (file already opened by init_reader)
36
0
    const FieldDescriptor* field_desc = nullptr;
37
0
    RETURN_IF_ERROR(get_file_metadata_schema(&field_desc));
38
0
    DCHECK(field_desc != nullptr);
39
40
    // Build table_info_node using field_id matching (shared with Paimon/Iceberg)
41
0
    RETURN_IF_ERROR(gen_table_info_node_by_field_id(
42
0
            get_scan_params(), get_scan_range().table_format_params.hudi_params.schema_id,
43
0
            get_tuple_descriptor(), *field_desc));
44
0
    ctx->table_info_node = table_info_node_ptr;
45
46
    // Extract column names from descriptors
47
0
    for (const auto& desc : *ctx->column_descs) {
48
0
        if (desc.category == ColumnCategory::REGULAR ||
49
0
            desc.category == ColumnCategory::GENERATED) {
50
0
            ctx->column_names.push_back(desc.name);
51
0
        }
52
0
    }
53
0
    return Status::OK();
54
0
}
55
56
// ============================================================================
57
// HudiOrcReader: on_before_init_reader
58
// ============================================================================
59
0
Status HudiOrcReader::on_before_init_reader(ReaderInitContext* ctx) {
60
0
    _column_descs = ctx->column_descs;
61
0
    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
62
0
    RETURN_IF_ERROR(_extract_partition_values(*ctx->range, ctx->tuple_descriptor,
63
0
                                              _fill_partition_values,
64
0
                                              &_fill_partition_value_is_null));
65
    // Get ORC file type (file already opened by init_reader)
66
0
    const orc::Type* orc_type_ptr = nullptr;
67
0
    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
68
69
    // Build table_info_node using field_id matching
70
0
    RETURN_IF_ERROR(gen_table_info_node_by_field_id(
71
0
            get_scan_params(), get_scan_range().table_format_params.hudi_params.schema_id,
72
0
            get_tuple_descriptor(), orc_type_ptr));
73
0
    ctx->table_info_node = table_info_node_ptr;
74
75
    // Extract column names from descriptors
76
0
    for (const auto& desc : *ctx->column_descs) {
77
0
        if (desc.category == ColumnCategory::REGULAR ||
78
0
            desc.category == ColumnCategory::GENERATED) {
79
0
            ctx->column_names.push_back(desc.name);
80
0
        }
81
0
    }
82
0
    return Status::OK();
83
0
}
84
85
} // namespace doris