Coverage Report

Created: 2026-01-16 14:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/columns/column.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.cpp
19
// and modified by Doris
20
21
#include "vec/columns/column.h"
22
23
#include "util/simd/bits.h"
24
#include "vec/columns/column_const.h"
25
#include "vec/columns/column_nullable.h"
26
#include "vec/core/sort_block.h"
27
#include "vec/data_types/data_type.h"
28
29
namespace doris::vectorized {
30
31
6.32k
std::string IColumn::dump_structure() const {
32
6.32k
    std::stringstream res;
33
6.32k
    res << get_name() << "(size = " << size();
34
35
6.32k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
36
5.62k
        res << ", " << subcolumn->dump_structure();
37
5.62k
    };
38
39
    // simply read using for_each_subcolumn without modification; const_cast can be used.
40
6.32k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
41
42
6.32k
    res << ")";
43
6.32k
    return res.str();
44
6.32k
}
45
46
77.0k
int IColumn::count_const_column() const {
47
77.0k
    int count = is_column_const(*this) ? 1 : 0;
48
77.0k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
49
15.8k
        count += subcolumn->count_const_column();
50
15.8k
    };
51
    // simply read using for_each_subcolumn without modification; const_cast can be used.
52
77.0k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
53
77.0k
    return count;
54
77.0k
}
55
56
61.1k
bool IColumn::const_nested_check() const {
57
61.1k
    auto const_cnt = count_const_column();
58
61.1k
    if (const_cnt == 0) {
59
61.1k
        return true;
60
61.1k
    }
61
    // A const column is not allowed to be nested; it may only appear as the outermost (top-level) column.
62
8
    return const_cnt == 1 && is_column_const(*this);
63
61.1k
}
64
65
69.9k
bool IColumn::column_boolean_check() const {
66
69.9k
    if (const auto* col_nullable = check_and_get_column<ColumnNullable>(*this)) {
67
        // for column nullable, we need to skip null values check
68
7.12k
        const auto& nested_col = col_nullable->get_nested_column();
69
7.12k
        const auto& null_map = col_nullable->get_null_map_data();
70
7.12k
        Filter not_null_filter;
71
7.12k
        not_null_filter.reserve(nested_col.size());
72
7.12k
        size_t result_size_hint = 0;
73
2.27M
        for (size_t i = 0; i < null_map.size(); ++i) {
74
2.26M
            not_null_filter.push_back(null_map[i] == 0);
75
2.26M
            if (null_map[i] == 0) {
76
2.21M
                ++result_size_hint;
77
2.21M
            }
78
2.26M
        }
79
7.12k
        auto nested_col_skip_null = nested_col.filter(not_null_filter, result_size_hint);
80
7.12k
        return nested_col_skip_null->column_boolean_check();
81
7.12k
    }
82
83
62.7k
    auto check_boolean_is_zero_or_one = [&](const IColumn& subcolumn) {
84
62.7k
        if (const auto* column_boolean = check_and_get_column<ColumnBool>(subcolumn)) {
85
142
            for (size_t i = 0; i < column_boolean->size(); ++i) {
86
120
                auto val = column_boolean->get_element(i);
87
120
                if (val != 0 && val != 1) {
88
4
                    LOG_WARNING("column boolean check failed at index {} with value {}", i, val)
89
4
                            .tag("column structure", subcolumn.dump_structure());
90
4
                    return false;
91
4
                }
92
120
            }
93
26
        }
94
62.7k
        return true;
95
62.7k
    };
96
97
62.7k
    bool is_valid = check_boolean_is_zero_or_one(*this);
98
62.7k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
99
1.60k
        if (!subcolumn->column_boolean_check()) {
100
0
            is_valid = false;
101
0
        }
102
1.60k
    };
103
    // simply read using for_each_subcolumn without modification; const_cast can be used.
104
62.7k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
105
62.7k
    return is_valid;
106
69.9k
}
107
108
77.0k
bool IColumn::null_map_check() const {
109
77.0k
    auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) {
110
77.0k
        if (is_column_nullable(subcolumn)) {
111
7.12k
            const auto& nullable_col = assert_cast<const ColumnNullable&>(subcolumn);
112
7.12k
            const auto& null_map = nullable_col.get_null_map_data();
113
2.27M
            for (size_t i = 0; i < null_map.size(); ++i) {
114
2.26M
                if (null_map[i] != 0 && null_map[i] != 1) {
115
6
                    LOG_WARNING("null map check failed at index {} with value {}", i, null_map[i])
116
6
                            .tag("column structure", subcolumn.dump_structure());
117
6
                    return false;
118
6
                }
119
2.26M
            }
120
7.12k
        }
121
77.0k
        return true;
122
77.0k
    };
123
124
77.0k
    bool is_valid = check_null_map_is_zero_or_one(*this);
125
77.0k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
126
15.8k
        if (!subcolumn->null_map_check()) {
127
4
            is_valid = false;
128
4
        }
129
15.8k
    };
130
    // simply read using for_each_subcolumn without modification; const_cast can be used.
131
77.0k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
132
77.0k
    return is_valid;
133
77.0k
}
134
135
61.1k
Status IColumn::column_self_check() const {
136
61.1k
#ifndef NDEBUG
137
    // check const nested
138
61.1k
    if (!const_nested_check()) {
139
2
        return Status::InternalError("const nested check failed for column: {} , {}", get_name(),
140
2
                                     dump_structure());
141
2
    }
142
    // check null map
143
61.1k
    if (!null_map_check()) {
144
2
        return Status::InternalError("null map check failed for column: {}", get_name());
145
2
    }
146
    // check boolean column
147
61.1k
    if (!column_boolean_check()) {
148
0
        return Status::InternalError("boolean column check failed for column: {}", get_name());
149
0
    }
150
61.1k
#endif
151
61.1k
    return Status::OK();
152
61.1k
}
153
154
0
void IColumn::insert_from(const IColumn& src, size_t n) {
155
0
    insert(src[n]);
156
0
}
157
158
void IColumn::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
159
0
                          IColumn::Permutation& perms, EqualRange& range, bool last_column) const {
160
0
    sorter->sort_column(static_cast<const IColumn&>(*this), flags, perms, range, last_column);
161
0
}
162
163
void IColumn::compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint,
164
                               int direction, std::vector<uint8_t>& cmp_res,
165
6
                               uint8_t* __restrict filter) const {
166
6
    auto sz = this->size();
167
6
    DCHECK(cmp_res.size() == sz);
168
6
    size_t begin = simd::find_zero(cmp_res, 0);
169
12
    while (begin < sz) {
170
6
        size_t end = simd::find_one(cmp_res, begin + 1);
171
12
        for (size_t row_id = begin; row_id < end; row_id++) {
172
6
            int res = this->compare_at(row_id, rhs_row_id, rhs, nan_direction_hint);
173
6
            if (res * direction < 0) {
174
2
                filter[row_id] = 1;
175
2
                cmp_res[row_id] = 1;
176
4
            } else if (res * direction > 0) {
177
2
                cmp_res[row_id] = 1;
178
2
            }
179
6
        }
180
6
        begin = simd::find_zero(cmp_res, end + 1);
181
6
    }
182
6
}
183
184
void IColumn::serialize_with_nullable(StringRef* keys, size_t num_rows, const bool has_null,
185
0
                                      const uint8_t* __restrict null_map) const {
186
0
    if (has_null) {
187
0
        for (size_t i = 0; i < num_rows; ++i) {
188
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
189
0
            if (null_map[i]) {
190
                // is null
191
0
                *dest = true;
192
0
                keys[i].size += sizeof(UInt8);
193
0
                continue;
194
0
            }
195
            // not null
196
0
            *dest = false;
197
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
198
0
        }
199
0
    } else {
200
0
        for (size_t i = 0; i < num_rows; ++i) {
201
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
202
0
            *dest = false;
203
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
204
0
        }
205
0
    }
206
0
}
207
208
void IColumn::deserialize_with_nullable(StringRef* keys, const size_t num_rows,
209
0
                                        PaddedPODArray<UInt8>& null_map) {
210
0
    for (size_t i = 0; i != num_rows; ++i) {
211
0
        UInt8 is_null = *reinterpret_cast<const UInt8*>(keys[i].data);
212
0
        null_map.push_back(is_null);
213
0
        keys[i].data += sizeof(UInt8);
214
0
        keys[i].size -= sizeof(UInt8);
215
0
        if (is_null) {
216
0
            insert_default();
217
0
            continue;
218
0
        }
219
0
        auto sz = deserialize_impl(keys[i].data);
220
0
        keys[i].data += sz;
221
0
        keys[i].size -= sz;
222
0
    }
223
0
}
224
225
141k
bool is_column_nullable(const IColumn& column) {
226
141k
    return is_column<ColumnNullable>(column);
227
141k
}
228
229
144M
bool is_column_const(const IColumn& column) {
230
144M
    return is_column<ColumnConst>(column);
231
144M
}
232
233
} // namespace doris::vectorized