Coverage Report

Created: 2026-07-02 13:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format_v2/table/paimon_reader.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format_v2/table/paimon_reader.h"
19
20
#include <glog/logging.h>
21
22
#include <cstring>
23
#include <string>
24
#include <utility>
25
26
#include "exprs/vexpr_context.h"
27
#include "format/table/deletion_vector_reader.h"
28
#include "format_v2/column_mapper.h"
29
#include "format_v2/jni/paimon_jni_reader.h"
30
#include "format_v2/table/schema_history_util.h"
31
#include "gen_cpp/PlanNodes_types.h"
32
33
namespace doris::format::paimon {
34
35
6
Status PaimonReader::prepare_split(const format::SplitReadOptions& options) {
36
6
    _split_schema_id = -1;
37
6
    const auto& paimon_params = options.current_range.table_format_params.paimon_params;
38
6
    if (paimon_params.__isset.schema_id) {
39
3
        _split_schema_id = paimon_params.schema_id;
40
3
    }
41
6
    return format::TableReader::prepare_split(options);
42
6
}
43
44
8
format::TableColumnMappingMode PaimonReader::mapping_mode() const {
45
8
    return format::can_map_by_history_schema(_scan_params, _split_schema_id)
46
8
                   ? format::TableColumnMappingMode::BY_FIELD_ID
47
8
                   : format::TableColumnMappingMode::BY_NAME;
48
8
}
49
50
3
Status PaimonReader::annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
51
3
    DORIS_CHECK(file_schema != nullptr);
52
3
    if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) {
53
2
        return Status::OK();
54
2
    }
55
1
    return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema);
56
3
}
57
58
Status PaimonReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
59
6
                                                 DeleteFileDesc* desc, bool* has_delete_file) {
60
6
    DORIS_CHECK(desc != nullptr);
61
6
    DORIS_CHECK(has_delete_file != nullptr);
62
6
    *has_delete_file = false;
63
6
    const auto& table_desc = t_desc.paimon_params;
64
6
    if (!table_desc.__isset.deletion_file) {
65
5
        return Status::OK();
66
5
    }
67
1
    const auto& deletion_file = table_desc.deletion_file;
68
69
1
    const std::string key_prefix = "paimon_dv:";
70
1
    desc->key.resize(key_prefix.size() + deletion_file.path.size() + sizeof(deletion_file.offset));
71
1
    char* key_data = desc->key.data();
72
1
    memcpy(key_data, key_prefix.data(), key_prefix.size());
73
1
    key_data += key_prefix.size();
74
1
    memcpy(key_data, deletion_file.path.data(), deletion_file.path.size());
75
1
    key_data += deletion_file.path.size();
76
1
    memcpy(key_data, &deletion_file.offset, sizeof(deletion_file.offset));
77
1
    desc->path = deletion_file.path;
78
1
    desc->start_offset = deletion_file.offset;
79
1
    desc->size = deletion_file.length + 4;
80
1
    desc->file_size = -1;
81
1
    desc->format = DeleteFileDesc::Format::PAIMON;
82
1
    *has_delete_file = true;
83
1
    return Status::OK();
84
6
}
85
86
1
Status PaimonHybridReader::init(format::TableReadOptions&& options) {
87
1
    return format::TableReader::init(std::move(options));
88
1
}
89
90
2
Status PaimonHybridReader::prepare_split(const format::SplitReadOptions& options) {
91
2
    RETURN_IF_ERROR(_ensure_current_split_reader(options));
92
2
    DORIS_CHECK(_current_split_reader != nullptr);
93
2
    return _current_split_reader->prepare_split(options);
94
2
}
95
96
0
Status PaimonHybridReader::get_block(Block* block, bool* eos) {
97
0
    DORIS_CHECK(_current_split_reader != nullptr);
98
0
    return _current_split_reader->get_block(block, eos);
99
0
}
100
101
1
Status PaimonHybridReader::close() {
102
1
    Status close_status = Status::OK();
103
1
    if (_native_reader != nullptr) {
104
1
        close_status = _native_reader->close();
105
1
    }
106
1
    if (_jni_reader != nullptr) {
107
1
        auto status = _jni_reader->close();
108
1
        if (!status.ok() && close_status.ok()) {
109
0
            close_status = std::move(status);
110
0
        }
111
1
    }
112
1
    _current_split_reader = nullptr;
113
1
    return close_status;
114
1
}
115
116
2
Status PaimonHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) {
117
2
    if (_is_jni_split(options.current_range)) {
118
1
        DCHECK(options.current_split_format == format::FileFormat::JNI);
119
1
        if (_jni_reader == nullptr) {
120
1
            _jni_reader = std::make_unique<format::paimon::PaimonJniReader>();
121
1
            RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI));
122
1
        }
123
1
        _current_split_reader = _jni_reader.get();
124
1
    } else {
125
1
        format::FileFormat file_format;
126
1
        RETURN_IF_ERROR(_to_file_format(options.current_range, &file_format));
127
1
        DCHECK(options.current_split_format == file_format);
128
1
        DCHECK(file_format == format::FileFormat::PARQUET ||
129
1
               file_format == format::FileFormat::ORC);
130
1
        if (_native_reader == nullptr) {
131
1
            _native_reader = format::paimon::PaimonReader::create_unique();
132
1
            RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format));
133
1
        }
134
1
        _current_split_reader = _native_reader.get();
135
1
    }
136
2
    return Status::OK();
137
2
}
138
139
Status PaimonHybridReader::_init_child_reader(format::TableReader* reader,
140
2
                                              format::FileFormat file_format) {
141
2
    DORIS_CHECK(reader != nullptr);
142
2
    VExprContextSPtrs conjuncts;
143
2
    RETURN_IF_ERROR(_clone_conjuncts(&conjuncts));
144
2
    return reader->init({
145
2
            .projected_columns = _projected_columns,
146
2
            .column_predicates = _table_column_predicates,
147
2
            .conjuncts = std::move(conjuncts),
148
2
            .format = file_format,
149
2
            .scan_params = _scan_params,
150
2
            .io_ctx = _io_ctx,
151
2
            .runtime_state = _runtime_state,
152
2
            .scanner_profile = _scanner_profile,
153
2
            .push_down_agg_type = _push_down_agg_type,
154
2
            .condition_cache_digest = _condition_cache_digest,
155
2
    });
156
2
}
157
158
2
Status PaimonHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const {
159
2
    DORIS_CHECK(conjuncts != nullptr);
160
2
    conjuncts->clear();
161
2
    conjuncts->reserve(_conjuncts.size());
162
2
    for (const auto& conjunct : _conjuncts) {
163
0
        VExprSPtr root;
164
0
        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
165
0
        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
166
0
    }
167
2
    return Status::OK();
168
2
}
169
170
5
bool PaimonHybridReader::_is_jni_split(const TFileRangeDesc& range) {
171
5
    return range.__isset.table_format_params && range.table_format_params.__isset.paimon_params &&
172
5
           range.table_format_params.paimon_params.__isset.reader_type &&
173
5
           range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI;
174
5
}
175
176
Status PaimonHybridReader::_to_file_format(const TFileRangeDesc& range,
177
4
                                           format::FileFormat* file_format) {
178
4
    DORIS_CHECK(file_format != nullptr);
179
4
    const auto format_type =
180
4
            range.__isset.format_type ? range.format_type : TFileFormatType::FORMAT_PARQUET;
181
4
    switch (format_type) {
182
2
    case TFileFormatType::FORMAT_PARQUET:
183
2
        *file_format = format::FileFormat::PARQUET;
184
2
        return Status::OK();
185
1
    case TFileFormatType::FORMAT_ORC:
186
1
        *file_format = format::FileFormat::ORC;
187
1
        return Status::OK();
188
1
    default:
189
1
        return Status::NotSupported("Unsupported native Paimon file format {}",
190
1
                                    to_string(format_type));
191
4
    }
192
4
}
193
194
} // namespace doris::format::paimon