Coverage Report

Created: 2024-11-21 10:56

/root/doris/be/src/olap/schema.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stddef.h>
21
#include <stdint.h>
22
23
#include <algorithm>
24
#include <cstdint>
25
#include <memory>
26
#include <string>
27
#include <vector>
28
29
#include "common/consts.h"
30
#include "io/io_common.h"
31
#include "olap/field.h"
32
#include "olap/olap_common.h"
33
#include "olap/tablet_schema.h"
34
#include "olap/utils.h"
35
#include "runtime/thread_context.h"
36
#include "vec/aggregate_functions/aggregate_function.h"
37
#include "vec/columns/column.h"
38
39
namespace doris {
40
41
// The class is used to represent row's format in memory.  Each row contains
42
// multiple columns, some of which are key-columns (the rest are value-columns).
43
// NOTE: If both key-columns and value-columns exist, then the key-columns
44
// must be placed before value-columns.
45
//
46
// To compare two rows whose schemas are different, but they are from the same origin
47
// we store all column schema maybe accessed here. And default access through column id
48
class Schema;
49
using SchemaSPtr = std::shared_ptr<const Schema>;
50
class Schema {
51
public:
52
4.15k
    Schema(TabletSchemaSPtr tablet_schema) {
53
4.15k
        size_t num_columns = tablet_schema->num_columns();
54
        // ignore this column
55
4.15k
        if (tablet_schema->columns().back()->name() == BeConsts::ROW_STORE_COL) {
56
0
            --num_columns;
57
0
        }
58
4.15k
        std::vector<ColumnId> col_ids(num_columns);
59
4.15k
        _unique_ids.resize(num_columns);
60
4.15k
        std::vector<TabletColumnPtr> columns;
61
4.15k
        columns.reserve(num_columns);
62
63
4.15k
        size_t num_key_columns = 0;
64
12.8k
        for (uint32_t cid = 0; cid < num_columns; ++cid) {
65
8.73k
            col_ids[cid] = cid;
66
8.73k
            const TabletColumn& column = tablet_schema->column(cid);
67
8.73k
            _unique_ids[cid] = column.unique_id();
68
8.73k
            if (column.is_key()) {
69
2.50k
                ++num_key_columns;
70
2.50k
            }
71
8.73k
            if (column.name() == BeConsts::ROWID_COL) {
72
0
                _rowid_col_idx = cid;
73
0
            }
74
8.73k
            if (column.name() == VERSION_COL) {
75
0
                _version_col_idx = cid;
76
0
            }
77
8.73k
            columns.push_back(std::make_shared<TabletColumn>(column));
78
8.73k
        }
79
4.15k
        _delete_sign_idx = tablet_schema->delete_sign_idx();
80
4.15k
        if (tablet_schema->has_sequence_col()) {
81
5
            _has_sequence_col = true;
82
5
        }
83
4.15k
        _init(columns, col_ids, num_key_columns);
84
4.15k
    }
85
86
    // All the columns of one table may exist in the columns param, but col_ids is only a subset.
87
605
    Schema(const std::vector<TabletColumnPtr>& columns, const std::vector<ColumnId>& col_ids) {
88
605
        size_t num_key_columns = 0;
89
605
        _unique_ids.resize(columns.size());
90
2.34k
        for (size_t i = 0; i < columns.size(); ++i) {
91
1.74k
            if (columns[i]->is_key()) {
92
696
                ++num_key_columns;
93
696
            }
94
1.74k
            if (columns[i]->name() == DELETE_SIGN) {
95
357
                _delete_sign_idx = i;
96
357
            }
97
1.74k
            if (columns[i]->name() == BeConsts::ROWID_COL) {
98
0
                _rowid_col_idx = i;
99
0
            }
100
1.74k
            if (columns[i]->name() == VERSION_COL) {
101
0
                _version_col_idx = i;
102
0
            }
103
1.74k
            _unique_ids[i] = columns[i]->unique_id();
104
1.74k
        }
105
605
        _init(columns, col_ids, num_key_columns);
106
605
    }
107
108
    // Only for UT
109
5
    Schema(const std::vector<TabletColumnPtr>& columns, size_t num_key_columns) {
110
5
        std::vector<ColumnId> col_ids(columns.size());
111
5
        _unique_ids.resize(columns.size());
112
20
        for (uint32_t cid = 0; cid < columns.size(); ++cid) {
113
15
            col_ids[cid] = cid;
114
15
            _unique_ids[cid] = columns[cid]->unique_id();
115
15
        }
116
117
5
        _init(columns, col_ids, num_key_columns);
118
5
    }
119
120
0
    Schema(const std::vector<const Field*>& cols, size_t num_key_columns) {
121
0
        std::vector<ColumnId> col_ids(cols.size());
122
0
        _unique_ids.resize(cols.size());
123
0
        for (uint32_t cid = 0; cid < cols.size(); ++cid) {
124
0
            col_ids[cid] = cid;
125
0
            if (cols.at(cid)->name() == DELETE_SIGN) {
126
0
                _delete_sign_idx = cid;
127
0
            }
128
0
            if (cols.at(cid)->name() == VERSION_COL) {
129
0
                _version_col_idx = cid;
130
0
            }
131
0
            _unique_ids[cid] = cols[cid]->unique_id();
132
0
        }
133
134
0
        _init(cols, col_ids, num_key_columns);
135
0
    }
136
137
    Schema(const Schema&);
138
    Schema& operator=(const Schema& other);
139
140
    ~Schema();
141
142
    static vectorized::DataTypePtr get_data_type_ptr(const Field& field);
143
144
    static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field);
145
146
    static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const FieldType& type,
147
                                                                    bool is_nullable,
148
                                                                    const ReaderType reader_type);
149
150
82.4k
    const std::vector<Field*>& columns() const { return _cols; }
151
152
521k
    const Field* column(ColumnId cid) const { return _cols[cid]; }
153
154
0
    Field* mutable_column(ColumnId cid) const { return _cols[cid]; }
155
156
603
    size_t num_key_columns() const { return _num_key_columns; }
157
57
    size_t schema_size() const { return _schema_size; }
158
159
1.22M
    size_t column_offset(ColumnId cid) const { return _col_offsets[cid]; }
160
161
0
    size_t column_size(ColumnId cid) const { return _cols[cid]->size(); }
162
163
0
    bool is_null(const char* row, int index) const {
164
0
        return *reinterpret_cast<const bool*>(row + _col_offsets[index]);
165
0
    }
166
0
    void set_is_null(void* row, uint32_t cid, bool is_null) const {
167
0
        *reinterpret_cast<bool*>((char*)row + _col_offsets[cid]) = is_null;
168
0
    }
169
170
22.1k
    size_t num_columns() const { return _cols.size(); }
171
156k
    size_t num_column_ids() const { return _col_ids.size(); }
172
39.4k
    const std::vector<ColumnId>& column_ids() const { return _col_ids; }
173
0
    const std::vector<int32_t>& unique_ids() const { return _unique_ids; }
174
88.5k
    ColumnId column_id(size_t index) const { return _col_ids[index]; }
175
0
    int32_t unique_id(size_t index) const { return _unique_ids[index]; }
176
0
    int32_t delete_sign_idx() const { return _delete_sign_idx; }
177
0
    bool has_sequence_col() const { return _has_sequence_col; }
178
4.95k
    int32_t rowid_col_idx() const { return _rowid_col_idx; }
179
24.8k
    int32_t version_col_idx() const { return _version_col_idx; }
180
    // Don't use.
181
    // TODO: memory size of Schema cannot be accurately tracked.
182
    // In some places, temporarily use num_columns() as Schema size.
183
0
    int64_t mem_size() const { return _mem_size; }
184
185
private:
186
    void _init(const std::vector<TabletColumnPtr>& cols, const std::vector<ColumnId>& col_ids,
187
               size_t num_key_columns);
188
    void _init(const std::vector<const Field*>& cols, const std::vector<ColumnId>& col_ids,
189
               size_t num_key_columns);
190
191
    void _copy_from(const Schema& other);
192
193
    // NOTE: The ColumnId here represents the sequential index number (starting from 0) of
194
    // a column in current row, not the unique id-identifier of each column
195
    std::vector<ColumnId> _col_ids;
196
    std::vector<int32_t> _unique_ids;
197
    // NOTE: Both _cols[cid] and _col_offsets[cid] can only be accessed when the cid is
198
    // contained in _col_ids
199
    std::vector<Field*> _cols;
200
    // The value of each item indicates the starting offset of the corresponding column in
201
    // current row. e.g. _col_offsets[idx] is the offset of _cols[idx] (idx must in _col_ids)
202
    std::vector<size_t> _col_offsets;
203
204
    size_t _num_key_columns;
205
    size_t _schema_size;
206
    int32_t _delete_sign_idx = -1;
207
    bool _has_sequence_col = false;
208
    int32_t _rowid_col_idx = -1;
209
    int32_t _version_col_idx = -1;
210
    int64_t _mem_size = 0;
211
};
212
213
} // namespace doris