Coverage Report

Created: 2026-07-03 13:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format_v2/schema_projection.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format_v2/schema_projection.h"
19
20
#include <algorithm>
21
#include <memory>
22
#include <utility>
23
24
#include "core/assert_cast.h"
25
#include "core/data_type/data_type_array.h"
26
#include "core/data_type/data_type_map.h"
27
#include "core/data_type/data_type_nullable.h"
28
#include "core/data_type/data_type_struct.h"
29
30
namespace doris::format {
31
namespace {
32
33
// Rebuild the complex DataType for one already-pruned semantic ColumnDefinition node.
34
//
35
// The caller has already matched the projection against ColumnDefinition::children and preserved
36
// the file-local child order. This helper only mirrors those projected semantic children into the
37
// node type. It intentionally does not understand physical format wrappers. In particular, a MAP
38
// node is expected to have semantic children [key, value], even if the underlying format stores a
39
// wrapper such as Parquet key_value/entry.
40
Status rebuild_semantic_projected_type(const DataTypePtr& original_type,
41
                                       const std::vector<ColumnDefinition>& projected_children,
42
120
                                       DataTypePtr* projected_type) {
43
120
    DORIS_CHECK(original_type != nullptr);
44
120
    DORIS_CHECK(projected_type != nullptr);
45
46
120
    DataTypePtr nested_projected_type;
47
120
    const auto primitive_type = remove_nullable(original_type)->get_primitive_type();
48
120
    switch (primitive_type) {
49
85
    case TYPE_STRUCT: {
50
85
        DataTypes child_types;
51
85
        Strings child_names;
52
85
        child_types.reserve(projected_children.size());
53
85
        child_names.reserve(projected_children.size());
54
127
        for (const auto& child : projected_children) {
55
127
            child_types.push_back(child.type);
56
127
            child_names.push_back(child.name);
57
127
        }
58
85
        nested_projected_type = std::make_shared<DataTypeStruct>(child_types, child_names);
59
85
        break;
60
0
    }
61
12
    case TYPE_ARRAY:
62
12
        DORIS_CHECK(projected_children.size() == 1);
63
12
        nested_projected_type = std::make_shared<DataTypeArray>(projected_children[0].type);
64
12
        break;
65
23
    case TYPE_MAP: {
66
23
        DORIS_CHECK(remove_nullable(original_type)->get_primitive_type() == TYPE_MAP);
67
23
        const auto* original_map_type =
68
23
                assert_cast<const DataTypeMap*>(remove_nullable(original_type).get());
69
23
        DataTypePtr key_type = original_map_type->get_key_type();
70
23
        DataTypePtr value_type;
71
30
        for (const auto& child : projected_children) {
72
            // Partial MAP projection only prunes the value subtree. The key stream must remain
73
            // complete because it defines entry existence and offsets when materializing ColumnMap;
74
            // the projected DataTypeMap also preserves the original key type instead of rebuilding
75
            // it from children. If a caller includes key in the semantic child list, ignore it
76
            // here; the presence of a value child still decides the projected value shape.
77
30
            if (child.file_local_id() == 0 || child.name == "key") {
78
8
                continue;
79
8
            }
80
22
            if (child.file_local_id() == 1 || child.name == "value") {
81
22
                value_type = child.type;
82
22
            }
83
22
        }
84
23
        if (value_type == nullptr) {
85
1
            return Status::NotSupported("MAP projection for type {} contains no value child",
86
1
                                        original_type->get_name());
87
1
        }
88
22
        nested_projected_type = std::make_shared<DataTypeMap>(key_type, value_type);
89
22
        break;
90
23
    }
91
0
    default:
92
0
        return Status::InvalidArgument("Cannot project children from non-complex type {}",
93
0
                                       original_type->get_name());
94
120
    }
95
96
119
    *projected_type = original_type->is_nullable() ? make_nullable(nested_projected_type)
97
119
                                                   : nested_projected_type;
98
119
    return Status::OK();
99
120
}
100
101
} // namespace
102
103
Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection,
104
3.62k
                                 ColumnDefinition* projected_field) {
105
3.62k
    if (projected_field == nullptr) {
106
0
        return Status::InvalidArgument("projected_field is null");
107
0
    }
108
3.62k
    *projected_field = field;
109
3.62k
    if (projection.project_all_children || projection.children.empty()) {
110
3.49k
        return Status::OK();
111
3.49k
    }
112
113
123
    projected_field->children.clear();
114
172
    for (const auto& child_projection : projection.children) {
115
172
        if (child_projection.local_id() == -1) {
116
1
            return Status::InvalidArgument("Empty projection path for field {}", field.name);
117
1
        }
118
171
        const auto child_it =
119
273
                std::ranges::find_if(field.children, [&](const ColumnDefinition& child) {
120
273
                    return child.file_local_id() == child_projection.local_id();
121
273
                });
122
171
        if (child_it == field.children.end()) {
123
2
            return Status::InvalidArgument("Invalid projection child id {} for field {}",
124
2
                                           child_projection.local_id(), field.name);
125
2
        }
126
171
    }
127
240
    for (const auto& child : field.children) {
128
240
        const auto child_projection_it =
129
299
                std::ranges::find_if(projection.children, [&](const LocalColumnIndex& child_proj) {
130
299
                    return child_proj.local_id() == child.file_local_id();
131
299
                });
132
240
        if (child_projection_it == projection.children.end()) {
133
71
            continue;
134
71
        }
135
169
        ColumnDefinition projected_child;
136
169
        RETURN_IF_ERROR(project_column_definition(child, *child_projection_it, &projected_child));
137
169
        projected_field->children.push_back(std::move(projected_child));
138
169
    }
139
120
    if (projected_field->children.empty()) {
140
0
        return Status::NotSupported("Projection for field {} contains no children", field.name);
141
0
    }
142
143
120
    return rebuild_semantic_projected_type(field.type, projected_field->children,
144
120
                                           &projected_field->type);
145
120
}
146
147
} // namespace doris::format