Coverage Report

Created: 2026-07-02 14:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format_v2/table/hudi_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <memory>
21
#include <vector>
22
23
#include "format_v2/table_reader.h"
24
25
namespace doris::format::hudi {
26
27
class HudiReader final : public format::TableReader {
28
public:
29
    ENABLE_FACTORY_CREATOR(HudiReader);
30
6
    ~HudiReader() final = default;
31
32
    Status prepare_split(const format::SplitReadOptions& options) override;
33
34
#ifdef BE_TEST
35
6
    void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; }
36
8
    format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); }
37
4
    Status TEST_annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
38
4
        return annotate_file_schema(file_schema);
39
4
    }
40
#endif
41
42
protected:
43
    format::TableColumnMappingMode mapping_mode() const override;
44
    Status annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) override;
45
46
private:
47
    int64_t _split_schema_id = -1;
48
};
49
50
// Hudi MOR scans can contain both JNI splits that need log-file merge semantics and native
51
// data-file splits without delta logs in the same SplitSource. FileScannerV2 owns one table reader
52
// for the scanner lifetime, so this reader keeps native and JNI child readers internally and
53
// dispatches each split to the matching child reader.
54
class HudiHybridReader final : public format::TableReader {
55
public:
56
0
    ~HudiHybridReader() override = default;
57
58
    Status init(format::TableReadOptions&& options) override;
59
    Status prepare_split(const format::SplitReadOptions& options) override;
60
    Status get_block(Block* block, bool* eos) override;
61
    Status close() override;
62
63
private:
64
    Status _ensure_current_split_reader(const format::SplitReadOptions& options);
65
    Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format);
66
    Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const;
67
    static TFileFormatType::type _range_format_type(const TFileScanRangeParams& params,
68
                                                    const TFileRangeDesc& range);
69
    static bool _is_jni_split(const TFileScanRangeParams& params, const TFileRangeDesc& range);
70
    static Status _to_file_format(const TFileScanRangeParams& params, const TFileRangeDesc& range,
71
                                  format::FileFormat* file_format);
72
73
    std::unique_ptr<format::TableReader> _native_reader; // handle native parquet/orc splits
74
    std::unique_ptr<format::TableReader> _jni_reader;    // handle MOR JNI splits
75
    format::TableReader* _current_split_reader = nullptr;
76
};
77
78
} // namespace doris::format::hudi