Coverage Report

Created: 2026-03-16 04:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/avro/avro_jni_reader.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/avro/avro_jni_reader.h"
19
20
#include <map>
21
#include <ostream>
22
23
#include "core/data_type/data_type_array.h"
24
#include "core/data_type/data_type_factory.hpp"
25
#include "core/data_type/data_type_map.h"
26
#include "core/data_type/data_type_struct.h"
27
#include "runtime/descriptors.h"
28
29
namespace doris {
30
#include "common/compile_check_begin.h"
31
32
AvroJNIReader::AvroJNIReader(RuntimeState* state, RuntimeProfile* profile,
33
                             const TFileScanRangeParams& params,
34
                             const std::vector<SlotDescriptor*>& file_slot_descs,
35
                             const TFileRangeDesc& range)
36
0
        : JniReader(file_slot_descs, state, profile), _params(params), _range(range) {}
37
38
AvroJNIReader::AvroJNIReader(RuntimeProfile* profile, const TFileScanRangeParams& params,
39
                             const TFileRangeDesc& range,
40
                             const std::vector<SlotDescriptor*>& file_slot_descs)
41
0
        : JniReader(file_slot_descs, nullptr, profile), _params(params), _range(range) {}
42
43
0
AvroJNIReader::~AvroJNIReader() = default;
44
45
0
Status AvroJNIReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
46
0
    RETURN_IF_ERROR(_jni_connector->get_next_block(block, read_rows, eof));
47
0
    if (*eof) {
48
0
        RETURN_IF_ERROR(_jni_connector->close());
49
0
    }
50
0
    return Status::OK();
51
0
}
52
53
Status AvroJNIReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type,
54
0
                                  std::unordered_set<std::string>* missing_cols) {
55
0
    for (const auto& desc : _file_slot_descs) {
56
0
        name_to_type->emplace(desc->col_name(), desc->type());
57
0
    }
58
0
    return Status::OK();
59
0
}
60
61
0
Status AvroJNIReader::init_reader() {
62
0
    std::ostringstream required_fields;
63
0
    std::ostringstream columns_types;
64
0
    std::vector<std::string> column_names;
65
0
    int index = 0;
66
0
    for (const auto& desc : _file_slot_descs) {
67
0
        std::string field = desc->col_name();
68
0
        column_names.emplace_back(field);
69
0
        std::string type = JniConnector::get_jni_type_with_different_string(desc->type());
70
0
        if (index == 0) {
71
0
            required_fields << field;
72
0
            columns_types << type;
73
0
        } else {
74
0
            required_fields << "," << field;
75
0
            columns_types << "#" << type;
76
0
        }
77
0
        index++;
78
0
    }
79
80
0
    TFileType::type type = get_file_type();
81
0
    std::map<String, String> required_param = {
82
0
            {"required_fields", required_fields.str()},
83
0
            {"columns_types", columns_types.str()},
84
0
            {"file_type", std::to_string(type)},
85
0
            {"is_get_table_schema", "false"},
86
0
            {"hive.serde", "org.apache.hadoop.hive.serde2.avro.AvroSerDe"}};
87
0
    if (type == TFileType::FILE_S3) {
88
0
        required_param.insert(_params.properties.begin(), _params.properties.end());
89
0
    }
90
0
    required_param.insert(
91
0
            std::make_pair("split_start_offset", std::to_string(_range.start_offset)));
92
0
    required_param.insert(std::make_pair("split_size", std::to_string(_range.size)));
93
0
    required_param.insert(std::make_pair("split_file_size", std::to_string(_range.file_size)));
94
0
    required_param.insert(std::make_pair("uri", _range.path));
95
0
    _jni_connector = std::make_unique<JniConnector>("org/apache/doris/avro/AvroJNIScanner",
96
0
                                                    required_param, column_names);
97
0
    RETURN_IF_ERROR(_jni_connector->init());
98
0
    return _jni_connector->open(_state, _profile);
99
0
}
100
101
0
TFileType::type AvroJNIReader::get_file_type() const {
102
0
    TFileType::type type;
103
0
    if (_range.__isset.file_type) {
104
        // for compatibility
105
0
        type = _range.file_type;
106
0
    } else {
107
0
        type = _params.file_type;
108
0
    }
109
0
    return type;
110
0
}
111
112
// open the jni connector for parsing schema
113
0
Status AvroJNIReader::init_schema_reader() {
114
0
    std::map<String, String> required_param = {{"uri", _range.path},
115
0
                                               {"file_type", std::to_string(get_file_type())},
116
0
                                               {"is_get_table_schema", "true"}};
117
118
0
    required_param.insert(_params.properties.begin(), _params.properties.end());
119
0
    _jni_connector =
120
0
            std::make_unique<JniConnector>("org/apache/doris/avro/AvroJNIScanner", required_param);
121
0
    return _jni_connector->open(nullptr, _profile);
122
0
}
123
124
Status AvroJNIReader::get_parsed_schema(std::vector<std::string>* col_names,
125
0
                                        std::vector<DataTypePtr>* col_types) {
126
0
    std::string table_schema_str;
127
0
    RETURN_IF_ERROR(_jni_connector->get_table_schema(table_schema_str));
128
129
0
    rapidjson::Document document;
130
0
    document.Parse(table_schema_str.c_str());
131
0
    if (document.IsArray()) {
132
0
        for (int i = 0; i < document.Size(); ++i) {
133
0
            rapidjson::Value& column_schema = document[i];
134
0
            col_names->emplace_back(column_schema["name"].GetString());
135
0
            col_types->push_back(convert_to_doris_type(column_schema));
136
0
        }
137
0
    }
138
0
    return _jni_connector->close();
139
0
}
140
141
0
DataTypePtr AvroJNIReader::convert_to_doris_type(const rapidjson::Value& column_schema) {
142
0
    auto schema_type = static_cast< ::doris::TPrimitiveType::type>(column_schema["type"].GetInt());
143
0
    switch (schema_type) {
144
0
    case TPrimitiveType::INT:
145
0
    case TPrimitiveType::STRING:
146
0
    case TPrimitiveType::BIGINT:
147
0
    case TPrimitiveType::BOOLEAN:
148
0
    case TPrimitiveType::DOUBLE:
149
0
    case TPrimitiveType::FLOAT:
150
0
    case TPrimitiveType::BINARY:
151
0
        return DataTypeFactory::instance().create_data_type(thrift_to_type(schema_type), true);
152
0
    case TPrimitiveType::ARRAY: {
153
0
        const rapidjson::Value& childColumns = column_schema["childColumns"];
154
0
        return make_nullable(std::make_shared<DataTypeArray>(
155
0
                make_nullable(convert_to_doris_type(childColumns[0]))));
156
0
    }
157
0
    case TPrimitiveType::MAP: {
158
0
        const rapidjson::Value& childColumns = column_schema["childColumns"];
159
0
        return make_nullable(std::make_shared<DataTypeMap>(
160
0
                DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_STRING, true),
161
0
                make_nullable(convert_to_doris_type(childColumns[1]))));
162
0
    }
163
0
    case TPrimitiveType::STRUCT: {
164
0
        DataTypes res_data_types;
165
0
        std::vector<std::string> names;
166
0
        const rapidjson::Value& childColumns = column_schema["childColumns"];
167
0
        for (auto i = 0; i < childColumns.Size(); i++) {
168
0
            const rapidjson::Value& child = childColumns[i];
169
0
            res_data_types.push_back(make_nullable(convert_to_doris_type(child)));
170
0
            names.push_back(std::string(child["name"].GetString()));
171
0
        }
172
0
        return make_nullable(std::make_shared<DataTypeStruct>(res_data_types, names));
173
0
    }
174
0
    default:
175
0
        throw Exception(Status::InternalError("Orc type is not supported!"));
176
0
        return nullptr;
177
0
    }
178
0
}
179
180
#include "common/compile_check_end.h"
181
} // namespace doris