Coverage Report

Created: 2026-06-26 15:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/column/column.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.cpp
19
// and modified by Doris
20
21
#include "core/column/column.h"
22
23
#include "core/column/column_const.h"
24
#include "core/column/column_nullable.h"
25
#include "core/data_type/data_type.h"
26
#include "exec/sort/sort_block.h"
27
#include "util/simd/bits.h"
28
29
namespace doris {
30
31
3.17k
std::string IColumn::dump_structure() const {
32
3.17k
    std::stringstream res;
33
3.17k
    res << get_name() << "(size = " << size();
34
35
3.17k
    ColumnCallback callback = [&](const IColumn& subcolumn) {
36
2.81k
        res << ", " << subcolumn.dump_structure();
37
2.81k
    };
38
39
3.17k
    for_each_subcolumn(callback);
40
41
3.17k
    res << ")";
42
3.17k
    return res.str();
43
3.17k
}
44
45
410k
int IColumn::count_const_column() const {
46
410k
    int count = is_column_const(*this) ? 1 : 0;
47
410k
    ColumnCallback callback = [&](const IColumn& subcolumn) {
48
198k
        count += subcolumn.count_const_column();
49
198k
    };
50
410k
    for_each_subcolumn(callback);
51
410k
    return count;
52
410k
}
53
54
211k
bool IColumn::const_nested_check() const {
55
211k
    auto const_cnt = count_const_column();
56
211k
    if (const_cnt == 0) {
57
211k
        return true;
58
211k
    }
59
    // A const column is not allowed to be nested; it may only appear as the outermost (top-level) column.
60
14
    return const_cnt == 1 && is_column_const(*this);
61
211k
}
62
63
312k
bool IColumn::column_boolean_check() const {
64
312k
    if (const auto* col_nullable = check_and_get_column<ColumnNullable>(*this)) {
65
        // for column nullable, we need to skip null values check
66
98.1k
        const auto& nested_col = col_nullable->get_nested_column();
67
98.1k
        const auto& null_map = col_nullable->get_null_map_data();
68
98.1k
        Filter not_null_filter;
69
98.1k
        not_null_filter.reserve(nested_col.size());
70
98.1k
        size_t result_size_hint = 0;
71
1.37M
        for (size_t i = 0; i < null_map.size(); ++i) {
72
1.27M
            not_null_filter.push_back(null_map[i] == 0);
73
1.27M
            if (null_map[i] == 0) {
74
1.20M
                ++result_size_hint;
75
1.20M
            }
76
1.27M
        }
77
98.1k
        auto nested_col_skip_null = nested_col.filter(not_null_filter, result_size_hint);
78
98.1k
        return nested_col_skip_null->column_boolean_check();
79
98.1k
    }
80
81
214k
    auto check_boolean_is_zero_or_one = [&](const IColumn& subcolumn) {
82
214k
        if (const auto* column_boolean = check_and_get_column<ColumnBool>(subcolumn)) {
83
83.9k
            for (size_t i = 0; i < column_boolean->size(); ++i) {
84
75.9k
                auto val = column_boolean->get_element(i);
85
75.9k
                if (val != 0 && val != 1) {
86
2
                    LOG_WARNING("column boolean check failed at index {} with value {}", i, val)
87
2
                            .tag("column structure", subcolumn.dump_structure());
88
2
                    return false;
89
2
                }
90
75.9k
            }
91
7.94k
        }
92
214k
        return true;
93
214k
    };
94
95
214k
    bool is_valid = check_boolean_is_zero_or_one(*this);
96
214k
    ColumnCallback callback = [&](const IColumn& subcolumn) {
97
2.19k
        if (!subcolumn.column_boolean_check()) {
98
0
            is_valid = false;
99
0
        }
100
2.19k
    };
101
214k
    for_each_subcolumn(callback);
102
214k
    return is_valid;
103
312k
}
104
105
410k
bool IColumn::null_map_check() const {
106
410k
    auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) {
107
410k
        if (is_column_nullable(subcolumn)) {
108
98.1k
            const auto& nullable_col = assert_cast<const ColumnNullable&>(subcolumn);
109
98.1k
            const auto& null_map = nullable_col.get_null_map_data();
110
1.37M
            for (size_t i = 0; i < null_map.size(); ++i) {
111
1.27M
                if (null_map[i] != 0 && null_map[i] != 1) {
112
3
                    LOG_WARNING("null map check failed at index {} with value {}", i, null_map[i])
113
3
                            .tag("column structure", subcolumn.dump_structure());
114
3
                    return false;
115
3
                }
116
1.27M
            }
117
98.1k
        }
118
410k
        return true;
119
410k
    };
120
121
410k
    bool is_valid = check_null_map_is_zero_or_one(*this);
122
410k
    ColumnCallback callback = [&](const IColumn& subcolumn) {
123
198k
        if (!subcolumn.null_map_check()) {
124
2
            is_valid = false;
125
2
        }
126
198k
    };
127
410k
    for_each_subcolumn(callback);
128
410k
    return is_valid;
129
410k
}
130
131
211k
Status IColumn::column_self_check() const {
132
211k
#ifndef NDEBUG
133
    // check const nested
134
211k
    if (!const_nested_check()) {
135
1
        return Status::InternalError("const nested check failed for column: {} , {}", get_name(),
136
1
                                     dump_structure());
137
1
    }
138
    // check null map
139
211k
    if (!null_map_check()) {
140
1
        return Status::InternalError("null map check failed for column: {}", get_name());
141
1
    }
142
    // check boolean column
143
211k
    if (!column_boolean_check()) {
144
0
        return Status::InternalError("boolean column check failed for column: {}", get_name());
145
0
    }
146
211k
#endif
147
211k
    return Status::OK();
148
211k
}
149
150
0
void IColumn::insert_from(const IColumn& src, size_t n) {
151
0
    insert(src[n]);
152
0
}
153
154
void IColumn::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
155
0
                          IColumn::Permutation& perms, EqualRange& range, bool last_column) const {
156
0
    sorter->sort_column(static_cast<const IColumn&>(*this), flags, perms, range, last_column);
157
0
}
158
159
void IColumn::compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint,
160
                               int direction, std::vector<uint8_t>& cmp_res,
161
3
                               uint8_t* __restrict filter) const {
162
3
    auto sz = this->size();
163
3
    DCHECK(cmp_res.size() == sz);
164
3
    size_t begin = simd::find_zero(cmp_res, 0);
165
6
    while (begin < sz) {
166
3
        size_t end = simd::find_one(cmp_res, begin + 1);
167
6
        for (size_t row_id = begin; row_id < end; row_id++) {
168
3
            int res = this->compare_at(row_id, rhs_row_id, rhs, nan_direction_hint);
169
3
            if (res * direction < 0) {
170
1
                filter[row_id] = 1;
171
1
                cmp_res[row_id] = 1;
172
2
            } else if (res * direction > 0) {
173
1
                cmp_res[row_id] = 1;
174
1
            }
175
3
        }
176
3
        begin = simd::find_zero(cmp_res, end + 1);
177
3
    }
178
3
}
179
180
void IColumn::serialize_with_nullable(StringRef* keys, size_t num_rows, const bool has_null,
181
0
                                      const uint8_t* __restrict null_map) const {
182
0
    if (has_null) {
183
0
        for (size_t i = 0; i < num_rows; ++i) {
184
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
185
0
            if (null_map[i]) {
186
                // is null
187
0
                *dest = true;
188
0
                keys[i].size += sizeof(UInt8);
189
0
                continue;
190
0
            }
191
            // not null
192
0
            *dest = false;
193
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
194
0
        }
195
0
    } else {
196
0
        for (size_t i = 0; i < num_rows; ++i) {
197
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
198
0
            *dest = false;
199
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
200
0
        }
201
0
    }
202
0
}
203
204
void IColumn::deserialize_with_nullable(StringRef* keys, const size_t num_rows,
205
0
                                        PaddedPODArray<UInt8>& null_map) {
206
0
    for (size_t i = 0; i != num_rows; ++i) {
207
0
        UInt8 is_null = *reinterpret_cast<const UInt8*>(keys[i].data);
208
0
        null_map.push_back(is_null);
209
0
        keys[i].data += sizeof(UInt8);
210
0
        keys[i].size -= sizeof(UInt8);
211
0
        if (is_null) {
212
0
            insert_default();
213
0
            continue;
214
0
        }
215
0
        auto sz = deserialize_impl(keys[i].data);
216
0
        keys[i].data += sz;
217
0
        keys[i].size -= sz;
218
0
    }
219
0
}
220
221
487k
bool is_column_nullable(const IColumn& column) {
222
487k
    return is_column<ColumnNullable>(column);
223
487k
}
224
225
73.4M
bool is_column_const(const IColumn& column) {
226
73.4M
    return is_column<ColumnConst>(column);
227
73.4M
}
228
229
797k
void IColumn::check_const_only_in_top_level() const {
230
1.60M
    ColumnCallback throw_if_const = [&](const IColumn& column) {
231
1.60M
        if (is_column_const(column)) {
232
49
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
233
49
                                   "const column is not allowed to be nested, but got {}",
234
49
                                   column.get_name());
235
49
        }
236
1.60M
    };
237
797k
    for_each_subcolumn(throw_if_const);
238
797k
}
239
240
} // namespace doris