Coverage Report

Created: 2026-01-14 01:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/columns/column.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.cpp
19
// and modified by Doris
20
21
#include "vec/columns/column.h"
22
23
#include "util/simd/bits.h"
24
#include "vec/columns/column_const.h"
25
#include "vec/columns/column_nullable.h"
26
#include "vec/core/sort_block.h"
27
#include "vec/data_types/data_type.h"
28
29
namespace doris::vectorized {
30
31
3.16k
std::string IColumn::dump_structure() const {
32
3.16k
    std::stringstream res;
33
3.16k
    res << get_name() << "(size = " << size();
34
35
3.16k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
36
2.81k
        res << ", " << subcolumn->dump_structure();
37
2.81k
    };
38
39
    // simply read using for_each_subcolumn without modification; const_cast can be used.
40
3.16k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
41
42
3.16k
    res << ")";
43
3.16k
    return res.str();
44
3.16k
}
45
46
38.5k
int IColumn::count_const_column() const {
47
38.5k
    int count = is_column_const(*this) ? 1 : 0;
48
38.5k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
49
7.93k
        count += subcolumn->count_const_column();
50
7.93k
    };
51
    // simply read using for_each_subcolumn without modification; const_cast can be used.
52
38.5k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
53
38.5k
    return count;
54
38.5k
}
55
56
30.5k
bool IColumn::const_nested_check() const {
57
30.5k
    auto const_cnt = count_const_column();
58
30.5k
    if (const_cnt == 0) {
59
30.5k
        return true;
60
30.5k
    }
61
    // A const column is not allowed to be nested; it may only appear as the outermost (top-level) column.
62
4
    return const_cnt == 1 && is_column_const(*this);
63
30.5k
}
64
65
34.9k
bool IColumn::column_boolean_check() const {
66
34.9k
    if (const auto* col_nullable = check_and_get_column<ColumnNullable>(*this)) {
67
        // for column nullable, we need to skip null values check
68
3.56k
        const auto& nested_col = col_nullable->get_nested_column();
69
3.56k
        const auto& null_map = col_nullable->get_null_map_data();
70
3.56k
        Filter not_null_filter;
71
3.56k
        not_null_filter.reserve(nested_col.size());
72
3.56k
        size_t result_size_hint = 0;
73
1.13M
        for (size_t i = 0; i < null_map.size(); ++i) {
74
1.13M
            not_null_filter.push_back(null_map[i] == 0);
75
1.13M
            if (null_map[i] == 0) {
76
1.10M
                ++result_size_hint;
77
1.10M
            }
78
1.13M
        }
79
3.56k
        auto nested_col_skip_null = nested_col.filter(not_null_filter, result_size_hint);
80
3.56k
        return nested_col_skip_null->column_boolean_check();
81
3.56k
    }
82
83
31.3k
    auto check_boolean_is_zero_or_one = [&](const IColumn& subcolumn) {
84
31.3k
        if (const auto* column_boolean = check_and_get_column<ColumnBool>(subcolumn)) {
85
71
            for (size_t i = 0; i < column_boolean->size(); ++i) {
86
60
                auto val = column_boolean->get_element(i);
87
60
                if (val != 0 && val != 1) {
88
2
                    LOG_WARNING("column boolean check failed at index {} with value {}", i, val)
89
2
                            .tag("column structure", subcolumn.dump_structure());
90
2
                    return false;
91
2
                }
92
60
            }
93
13
        }
94
31.3k
        return true;
95
31.3k
    };
96
97
31.3k
    bool is_valid = check_boolean_is_zero_or_one(*this);
98
31.3k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
99
804
        if (!subcolumn->column_boolean_check()) {
100
0
            is_valid = false;
101
0
        }
102
804
    };
103
    // simply read using for_each_subcolumn without modification; const_cast can be used.
104
31.3k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
105
31.3k
    return is_valid;
106
34.9k
}
107
108
38.5k
bool IColumn::null_map_check() const {
109
38.5k
    auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) {
110
38.5k
        if (is_column_nullable(subcolumn)) {
111
3.56k
            const auto& nullable_col = assert_cast<const ColumnNullable&>(subcolumn);
112
3.56k
            const auto& null_map = nullable_col.get_null_map_data();
113
1.13M
            for (size_t i = 0; i < null_map.size(); ++i) {
114
1.13M
                if (null_map[i] != 0 && null_map[i] != 1) {
115
3
                    LOG_WARNING("null map check failed at index {} with value {}", i, null_map[i])
116
3
                            .tag("column structure", subcolumn.dump_structure());
117
3
                    return false;
118
3
                }
119
1.13M
            }
120
3.56k
        }
121
38.5k
        return true;
122
38.5k
    };
123
124
38.5k
    bool is_valid = check_null_map_is_zero_or_one(*this);
125
38.5k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
126
7.93k
        if (!subcolumn->null_map_check()) {
127
2
            is_valid = false;
128
2
        }
129
7.93k
    };
130
    // simply read using for_each_subcolumn without modification; const_cast can be used.
131
38.5k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
132
38.5k
    return is_valid;
133
38.5k
}
134
135
30.5k
Status IColumn::column_self_check() const {
136
30.5k
#ifndef NDEBUG
137
    // check const nested
138
30.5k
    if (!const_nested_check()) {
139
1
        return Status::InternalError("const nested check failed for column: {} , {}", get_name(),
140
1
                                     dump_structure());
141
1
    }
142
    // check null map
143
30.5k
    if (!null_map_check()) {
144
1
        return Status::InternalError("null map check failed for column: {}", get_name());
145
1
    }
146
    // check boolean column
147
30.5k
    if (!column_boolean_check()) {
148
0
        return Status::InternalError("boolean column check failed for column: {}", get_name());
149
0
    }
150
30.5k
#endif
151
30.5k
    return Status::OK();
152
30.5k
}
153
154
0
void IColumn::insert_from(const IColumn& src, size_t n) {
155
0
    insert(src[n]);
156
0
}
157
158
void IColumn::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
159
0
                          IColumn::Permutation& perms, EqualRange& range, bool last_column) const {
160
0
    sorter->sort_column(static_cast<const IColumn&>(*this), flags, perms, range, last_column);
161
0
}
162
163
void IColumn::compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint,
164
                               int direction, std::vector<uint8_t>& cmp_res,
165
3
                               uint8_t* __restrict filter) const {
166
3
    auto sz = this->size();
167
3
    DCHECK(cmp_res.size() == sz);
168
3
    size_t begin = simd::find_zero(cmp_res, 0);
169
6
    while (begin < sz) {
170
3
        size_t end = simd::find_one(cmp_res, begin + 1);
171
6
        for (size_t row_id = begin; row_id < end; row_id++) {
172
3
            int res = this->compare_at(row_id, rhs_row_id, rhs, nan_direction_hint);
173
3
            if (res * direction < 0) {
174
1
                filter[row_id] = 1;
175
1
                cmp_res[row_id] = 1;
176
2
            } else if (res * direction > 0) {
177
1
                cmp_res[row_id] = 1;
178
1
            }
179
3
        }
180
3
        begin = simd::find_zero(cmp_res, end + 1);
181
3
    }
182
3
}
183
184
void IColumn::serialize_with_nullable(StringRef* keys, size_t num_rows, const bool has_null,
185
0
                                      const uint8_t* __restrict null_map) const {
186
0
    if (has_null) {
187
0
        for (size_t i = 0; i < num_rows; ++i) {
188
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
189
0
            if (null_map[i]) {
190
                // is null
191
0
                *dest = true;
192
0
                keys[i].size += sizeof(UInt8);
193
0
                continue;
194
0
            }
195
            // not null
196
0
            *dest = false;
197
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
198
0
        }
199
0
    } else {
200
0
        for (size_t i = 0; i < num_rows; ++i) {
201
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
202
0
            *dest = false;
203
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
204
0
        }
205
0
    }
206
0
}
207
208
void IColumn::deserialize_with_nullable(StringRef* keys, const size_t num_rows,
209
0
                                        PaddedPODArray<UInt8>& null_map) {
210
0
    for (size_t i = 0; i != num_rows; ++i) {
211
0
        UInt8 is_null = *reinterpret_cast<const UInt8*>(keys[i].data);
212
0
        null_map.push_back(is_null);
213
0
        keys[i].data += sizeof(UInt8);
214
0
        keys[i].size -= sizeof(UInt8);
215
0
        if (is_null) {
216
0
            insert_default();
217
0
            continue;
218
0
        }
219
0
        auto sz = deserialize_impl(keys[i].data);
220
0
        keys[i].data += sz;
221
0
        keys[i].size -= sz;
222
0
    }
223
0
}
224
225
70.9k
bool is_column_nullable(const IColumn& column) {
226
70.9k
    return is_column<ColumnNullable>(column);
227
70.9k
}
228
229
72.2M
bool is_column_const(const IColumn& column) {
230
72.2M
    return is_column<ColumnConst>(column);
231
72.2M
}
232
233
} // namespace doris::vectorized