Coverage Report

Created: 2026-03-14 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/iceberg/schema_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/table/iceberg/schema_parser.h"
19
20
#include <rapidjson/document.h>
21
#include <rapidjson/error/en.h>
22
23
#include <cstdint>
24
#include <optional>
25
#include <unordered_set>
26
27
#include "format/table/iceberg/schema.h"
28
#include "format/table/iceberg/types.h"
29
30
namespace doris::iceberg {
31
#include "common/compile_check_begin.h"
32
33
const char* SchemaParser::SCHEMA_ID = "schema-id";
34
const char* SchemaParser::IDENTIFIER_FIELD_IDS = "identifier-field-ids";
35
const char* SchemaParser::TYPE = "type";
36
const char* SchemaParser::STRUCT = "struct";
37
const char* SchemaParser::LIST = "list";
38
const char* SchemaParser::MAP = "map";
39
const char* SchemaParser::FIELDS = "fields";
40
const char* SchemaParser::ELEMENT = "element";
41
const char* SchemaParser::KEY = "key";
42
const char* SchemaParser::VALUE = "value";
43
const char* SchemaParser::DOC = "doc";
44
const char* SchemaParser::NAME = "name";
45
const char* SchemaParser::ID = "id";
46
const char* SchemaParser::ELEMENT_ID = "element-id";
47
const char* SchemaParser::KEY_ID = "key-id";
48
const char* SchemaParser::VALUE_ID = "value-id";
49
const char* SchemaParser::REQUIRED = "required";
50
const char* SchemaParser::ELEMENT_REQUIRED = "element-required";
51
const char* SchemaParser::VALUE_REQUIRED = "value-required";
52
53
47
std::unique_ptr<Type> SchemaParser::_type_from_json(const rapidjson::Value& value) {
54
47
    if (value.IsString()) {
55
24
        return Types::from_primitive_string(value.GetString());
56
24
    } else if (value.IsObject()) {
57
23
        if (value.HasMember(TYPE)) {
58
23
            std::string type = value[TYPE].GetString();
59
23
            if (type == STRUCT) {
60
6
                return _struct_from_json(value);
61
17
            } else if (type == LIST) {
62
8
                return _list_from_json(value);
63
9
            } else if (type == MAP) {
64
9
                return _map_from_json(value);
65
9
            }
66
23
        }
67
23
    }
68
0
    throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, "Cannot parse type from json.");
69
47
}
70
71
6
std::unique_ptr<StructType> SchemaParser::_struct_from_json(const rapidjson::Value& value) {
72
6
    if (!value.HasMember("fields") || !value["fields"].IsArray()) {
73
0
        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
74
0
                               "Cannot parse struct fields from non-array.");
75
0
    }
76
77
6
    const rapidjson::Value& field_array = value["fields"];
78
6
    std::vector<NestedField> fields;
79
6
    fields.reserve(field_array.Size());
80
81
15
    for (uint32_t i = 0; i < field_array.Size(); ++i) {
82
9
        const rapidjson::Value& field = field_array[i];
83
9
        if (!field.IsObject()) {
84
0
            throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
85
0
                                   "Cannot parse struct field from non-object.");
86
0
        }
87
88
9
        int id = field[ID].GetInt();
89
9
        std::string name = field[NAME].GetString();
90
9
        std::unique_ptr<Type> type = _type_from_json(field[TYPE]);
91
92
9
        std::optional<std::string> doc = std::nullopt;
93
9
        if (field.HasMember(DOC)) {
94
1
            doc = field[DOC].GetString();
95
1
        }
96
97
9
        bool is_required = field[REQUIRED].GetBool();
98
99
9
        fields.emplace_back(!is_required, id, name, std::move(type), doc);
100
9
    }
101
102
6
    return std::make_unique<StructType>(std::move(fields));
103
6
}
104
105
8
std::unique_ptr<ListType> SchemaParser::_list_from_json(const rapidjson::Value& value) {
106
8
    int element_id = value[ELEMENT_ID].GetInt();
107
8
    std::unique_ptr<Type> element_type = _type_from_json(value[ELEMENT]);
108
8
    bool is_required = value[ELEMENT_REQUIRED].GetBool();
109
110
8
    if (is_required) {
111
6
        return ListType::of_required(element_id, std::move(element_type));
112
6
    } else {
113
2
        return ListType::of_optional(element_id, std::move(element_type));
114
2
    }
115
8
}
116
117
9
std::unique_ptr<MapType> SchemaParser::_map_from_json(const rapidjson::Value& value) {
118
9
    int key_id = value[KEY_ID].GetInt();
119
9
    std::unique_ptr<Type> key_type = _type_from_json(value[KEY]);
120
121
9
    int value_id = value[VALUE_ID].GetInt();
122
9
    std::unique_ptr<Type> value_type = _type_from_json(value[VALUE]);
123
124
9
    bool is_required = value[VALUE_REQUIRED].GetBool();
125
126
9
    if (is_required) {
127
6
        return MapType::of_required(key_id, value_id, std::move(key_type), std::move(value_type));
128
6
    } else {
129
3
        return MapType::of_optional(key_id, value_id, std::move(key_type), std::move(value_type));
130
3
    }
131
9
}
132
133
5
std::unique_ptr<Schema> SchemaParser::from_json(const std::string& json) {
134
5
    rapidjson::Document doc;
135
5
    doc.Parse(json.c_str());
136
137
5
    if (doc.HasParseError()) {
138
0
        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, "Failed to parse JSON: {}.",
139
0
                               std::string(GetParseError_En(doc.GetParseError())));
140
0
    }
141
5
    std::unique_ptr<Type> type = _type_from_json(doc);
142
5
    return std::make_unique<Schema>(type->as_nested_type()->as_struct_type()->move_fields());
143
5
}
144
145
std::unordered_set<int> SchemaParser::_get_integer_set(const char* key,
146
0
                                                       const rapidjson::Value& value) {
147
0
    std::unordered_set<int> integer_set;
148
149
0
    if (value.HasMember(key) && value[key].IsArray()) {
150
0
        const rapidjson::Value& arr = value[key];
151
0
        for (uint32_t i = 0; i < arr.Size(); i++) {
152
0
            if (arr[i].IsInt()) {
153
0
                integer_set.insert(arr[i].GetInt());
154
0
            } else {
155
0
                throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
156
0
                                       "Unexpected non-integer element in the array.");
157
0
            }
158
0
        }
159
0
    }
160
0
    return integer_set;
161
0
}
162
163
#include "common/compile_check_end.h"
164
} // namespace doris::iceberg