Coverage Report

Created: 2025-12-30 17:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/vec/columns/column.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.cpp
19
// and modified by Doris
20
21
#include "vec/columns/column.h"
22
23
#include "util/simd/bits.h"
24
#include "vec/columns/column_const.h"
25
#include "vec/columns/column_nullable.h"
26
#include "vec/core/sort_block.h"
27
#include "vec/data_types/data_type.h"
28
29
namespace doris::vectorized {
30
31
3.15k
std::string IColumn::dump_structure() const {
32
3.15k
    std::stringstream res;
33
3.15k
    res << get_name() << "(size = " << size();
34
35
3.15k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
36
2.81k
        res << ", " << subcolumn->dump_structure();
37
2.81k
    };
38
39
    // simply read using for_each_subcolumn without modification; const_cast can be used.
40
3.15k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
41
42
3.15k
    res << ")";
43
3.15k
    return res.str();
44
3.15k
}
45
46
38.5k
int IColumn::count_const_column() const {
47
38.5k
    int count = is_column_const(*this) ? 1 : 0;
48
38.5k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
49
7.93k
        count += subcolumn->count_const_column();
50
7.93k
    };
51
    // simply read using for_each_subcolumn without modification; const_cast can be used.
52
38.5k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
53
38.5k
    return count;
54
38.5k
}
55
56
30.5k
bool IColumn::const_nested_check() const {
57
30.5k
    auto const_cnt = count_const_column();
58
30.5k
    if (const_cnt == 0) {
59
30.5k
        return true;
60
30.5k
    }
61
    // A const column is not allowed to be nested; it may only appear as the outermost (top-level) column.
62
4
    return const_cnt == 1 && is_column_const(*this);
63
30.5k
}
64
65
38.5k
bool IColumn::null_map_check() const {
66
38.5k
    auto check_null_map_is_zero_or_one = [&](const IColumn& subcolumn) {
67
38.5k
        if (is_column_nullable(subcolumn)) {
68
3.56k
            const auto& nullable_col = assert_cast<const ColumnNullable&>(subcolumn);
69
3.56k
            const auto& null_map = nullable_col.get_null_map_data();
70
1.13M
            for (size_t i = 0; i < null_map.size(); ++i) {
71
1.13M
                if (null_map[i] != 0 && null_map[i] != 1) {
72
3
                    LOG_WARNING("null map check failed at index {} with value {}", i, null_map[i])
73
3
                            .tag("column structure", subcolumn.dump_structure());
74
3
                    return false;
75
3
                }
76
1.13M
            }
77
3.56k
        }
78
38.5k
        return true;
79
38.5k
    };
80
81
38.5k
    bool is_valid = check_null_map_is_zero_or_one(*this);
82
38.5k
    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
83
7.93k
        if (!subcolumn->null_map_check()) {
84
2
            is_valid = false;
85
2
        }
86
7.93k
    };
87
    // simply read using for_each_subcolumn without modification; const_cast can be used.
88
38.5k
    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
89
38.5k
    return is_valid;
90
38.5k
}
91
92
30.5k
Status IColumn::column_self_check() const {
93
30.5k
#ifndef NDEBUG
94
    // check const nested
95
30.5k
    if (!const_nested_check()) {
96
1
        return Status::InternalError("const nested check failed for column: {} , {}", get_name(),
97
1
                                     dump_structure());
98
1
    }
99
    // check null map
100
30.5k
    if (!null_map_check()) {
101
1
        return Status::InternalError("null map check failed for column: {}", get_name());
102
1
    }
103
30.5k
#endif
104
30.5k
    return Status::OK();
105
30.5k
}
106
107
0
void IColumn::insert_from(const IColumn& src, size_t n) {
108
0
    insert(src[n]);
109
0
}
110
111
void IColumn::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
112
0
                          IColumn::Permutation& perms, EqualRange& range, bool last_column) const {
113
0
    sorter->sort_column(static_cast<const IColumn&>(*this), flags, perms, range, last_column);
114
0
}
115
116
void IColumn::compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint,
117
                               int direction, std::vector<uint8_t>& cmp_res,
118
3
                               uint8_t* __restrict filter) const {
119
3
    auto sz = this->size();
120
3
    DCHECK(cmp_res.size() == sz);
121
3
    size_t begin = simd::find_zero(cmp_res, 0);
122
6
    while (begin < sz) {
123
3
        size_t end = simd::find_one(cmp_res, begin + 1);
124
6
        for (size_t row_id = begin; row_id < end; row_id++) {
125
3
            int res = this->compare_at(row_id, rhs_row_id, rhs, nan_direction_hint);
126
3
            if (res * direction < 0) {
127
1
                filter[row_id] = 1;
128
1
                cmp_res[row_id] = 1;
129
2
            } else if (res * direction > 0) {
130
1
                cmp_res[row_id] = 1;
131
1
            }
132
3
        }
133
3
        begin = simd::find_zero(cmp_res, end + 1);
134
3
    }
135
3
}
136
137
void IColumn::serialize_with_nullable(StringRef* keys, size_t num_rows, const bool has_null,
138
0
                                      const uint8_t* __restrict null_map) const {
139
0
    if (has_null) {
140
0
        for (size_t i = 0; i < num_rows; ++i) {
141
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
142
0
            if (null_map[i]) {
143
                // is null
144
0
                *dest = true;
145
0
                keys[i].size += sizeof(UInt8);
146
0
                continue;
147
0
            }
148
            // not null
149
0
            *dest = false;
150
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
151
0
        }
152
0
    } else {
153
0
        for (size_t i = 0; i < num_rows; ++i) {
154
0
            char* dest = const_cast<char*>(keys[i].data + keys[i].size);
155
0
            *dest = false;
156
0
            keys[i].size += sizeof(UInt8) + serialize_impl(dest + sizeof(UInt8), i);
157
0
        }
158
0
    }
159
0
}
160
161
void IColumn::deserialize_with_nullable(StringRef* keys, const size_t num_rows,
162
0
                                        PaddedPODArray<UInt8>& null_map) {
163
0
    for (size_t i = 0; i != num_rows; ++i) {
164
0
        UInt8 is_null = *reinterpret_cast<const UInt8*>(keys[i].data);
165
0
        null_map.push_back(is_null);
166
0
        keys[i].data += sizeof(UInt8);
167
0
        keys[i].size -= sizeof(UInt8);
168
0
        if (is_null) {
169
0
            insert_default();
170
0
            continue;
171
0
        }
172
0
        auto sz = deserialize_impl(keys[i].data);
173
0
        keys[i].data += sz;
174
0
        keys[i].size -= sz;
175
0
    }
176
0
}
177
178
70.9k
bool is_column_nullable(const IColumn& column) {
179
70.9k
    return is_column<ColumnNullable>(column);
180
70.9k
}
181
182
72.2M
bool is_column_const(const IColumn& column) {
183
72.2M
    return is_column<ColumnConst>(column);
184
72.2M
}
185
186
} // namespace doris::vectorized