/root/doris/be/src/olap/schema.cpp
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #include "olap/schema.h" | 
| 19 |  |  | 
| 20 |  | #include <glog/logging.h> | 
| 21 |  |  | 
| 22 |  | #include <boost/iterator/iterator_facade.hpp> | 
| 23 |  | #include <ostream> | 
| 24 |  | #include <unordered_set> | 
| 25 |  | #include <utility> | 
| 26 |  |  | 
| 27 |  | #include "common/config.h" | 
| 28 |  | #include "olap/olap_common.h" | 
| 29 |  | #include "runtime/define_primitive_type.h" | 
| 30 |  | #include "util/trace.h" | 
| 31 |  | #include "vec/columns/column_array.h" | 
| 32 |  | #include "vec/columns/column_dictionary.h" | 
| 33 |  | #include "vec/columns/column_map.h" | 
| 34 |  | #include "vec/columns/column_nullable.h" | 
| 35 |  | #include "vec/columns/column_struct.h" | 
| 36 |  | #include "vec/columns/predicate_column.h" | 
| 37 |  | #include "vec/core/types.h" | 
| 38 |  | #include "vec/data_types/data_type.h" | 
| 39 |  | #include "vec/data_types/data_type_factory.hpp" | 
| 40 |  |  | 
| 41 |  | namespace doris { | 
| 42 |  |  | 
| 43 | 5.03M | Schema::Schema(const Schema& other) { | 
| 44 | 5.03M |     _copy_from(other); | 
| 45 | 5.03M | } | 
| 46 |  |  | 
| 47 | 0 | Schema& Schema::operator=(const Schema& other) { | 
| 48 | 0 |     if (this != &other) { | 
| 49 | 0 |         _copy_from(other); | 
| 50 | 0 |     } | 
| 51 | 0 |     return *this; | 
| 52 | 0 | } | 
| 53 |  |  | 
| 54 | 5.08M | void Schema::_copy_from(const Schema& other) { | 
| 55 | 5.08M |     _col_ids = other._col_ids; | 
| 56 | 5.08M |     _col_offsets = other._col_offsets; | 
| 57 |  |  | 
| 58 | 5.08M |     _num_key_columns = other._num_key_columns; | 
| 59 | 5.08M |     _schema_size = other._schema_size; | 
| 60 |  |  | 
| 61 |  |     // Deep copy _cols | 
| 62 |  |     // TODO(lingbin): really need clone? | 
| 63 | 5.08M |     _cols.resize(other._cols.size(), nullptr); | 
| 64 | 8.47M |     for (auto cid : _col_ids) { | 
| 65 | 8.47M |         _cols[cid] = other._cols[cid]->clone(); | 
| 66 | 8.47M |     } | 
| 67 | 5.08M | } | 
| 68 |  |  | 
| 69 |  | void Schema::_init(const std::vector<TabletColumnPtr>& cols, const std::vector<ColumnId>& col_ids, | 
| 70 | 7.84M |                    size_t num_key_columns) { | 
| 71 | 7.84M |     _col_ids = col_ids; | 
| 72 | 7.84M |     _num_key_columns = num_key_columns; | 
| 73 |  |  | 
| 74 | 7.84M |     _cols.resize(cols.size(), nullptr); | 
| 75 | 7.84M |     _col_offsets.resize(_cols.size(), -1); | 
| 76 |  |  | 
| 77 | 7.84M |     size_t offset = 0; | 
| 78 | 7.84M |     std::unordered_set<uint32_t> col_id_set(col_ids.begin(), col_ids.end()); | 
| 79 | 116M |     for (int cid = 0; cid < cols.size(); ++cid) { | 
| 80 | 108M |         if (col_id_set.find(cid) == col_id_set.end()) { | 
| 81 | 61.0M |             continue; | 
| 82 | 61.0M |         } | 
| 83 | 47.1M |         _cols[cid] = FieldFactory::create(*cols[cid]); | 
| 84 |  |  | 
| 85 | 47.1M |         _col_offsets[cid] = offset; | 
| 86 |  |         // Plus 1 byte for null byte | 
| 87 | 47.1M |         offset += _cols[cid]->size() + 1; | 
| 88 | 47.1M |     } | 
| 89 |  |  | 
| 90 | 7.84M |     _schema_size = offset; | 
| 91 | 7.84M | } | 
| 92 |  |  | 
| 93 |  | void Schema::_init(const std::vector<const Field*>& cols, const std::vector<ColumnId>& col_ids, | 
| 94 | 1.65M |                    size_t num_key_columns) { | 
| 95 | 1.65M |     _col_ids = col_ids; | 
| 96 | 1.65M |     _num_key_columns = num_key_columns; | 
| 97 |  |  | 
| 98 | 1.65M |     _cols.resize(cols.size(), nullptr); | 
| 99 | 1.65M |     _col_offsets.resize(_cols.size(), -1); | 
| 100 |  |  | 
| 101 | 1.65M |     size_t offset = 0; | 
| 102 | 1.65M |     std::unordered_set<uint32_t> col_id_set(col_ids.begin(), col_ids.end()); | 
| 103 | 7.64M |     for (int cid = 0; cid < cols.size(); ++cid) { | 
| 104 | 5.99M |         if (col_id_set.find(cid) == col_id_set.end()) { | 
| 105 | 0 |             continue; | 
| 106 | 0 |         } | 
| 107 |  |         // TODO(lingbin): is it necessary to clone Field? each SegmentIterator will | 
| 108 |  |         // use this func, can we avoid clone? | 
| 109 | 5.99M |         _cols[cid] = cols[cid]->clone(); | 
| 110 |  |  | 
| 111 | 5.99M |         _col_offsets[cid] = offset; | 
| 112 |  |         // Plus 1 byte for null byte | 
| 113 | 5.99M |         offset += _cols[cid]->size() + 1; | 
| 114 | 5.99M |     } | 
| 115 |  |  | 
| 116 | 1.65M |     _schema_size = offset; | 
| 117 | 1.65M | } | 
| 118 |  |  | 
| 119 | 14.6M | Schema::~Schema() { | 
| 120 | 178M |     for (auto col : _cols) { | 
| 121 | 178M |         delete col; | 
| 122 | 178M |     } | 
| 123 | 14.6M | } | 
| 124 |  |  | 
| 125 | 60.8M | vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) { | 
| 126 | 60.8M |     return vectorized::DataTypeFactory::instance().create_data_type(field); | 
| 127 | 60.8M | } | 
| 128 |  |  | 
| 129 | 6.15M | vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) { | 
| 130 | 6.15M |     return get_data_type_ptr(field)->create_column(); | 
| 131 | 6.15M | } | 
| 132 |  |  | 
| 133 |  | vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const FieldType& type, | 
| 134 |  |                                                                  bool is_nullable, | 
| 135 | 1.59M |                                                                  const ReaderType reader_type) { | 
| 136 | 1.59M |     vectorized::IColumn::MutablePtr ptr = nullptr; | 
| 137 | 1.59M |     switch (type) { | 
| 138 | 242 |     case FieldType::OLAP_FIELD_TYPE_BOOL: | 
| 139 | 242 |         ptr = doris::vectorized::PredicateColumnType<TYPE_BOOLEAN>::create(); | 
| 140 | 242 |         break; | 
| 141 | 1.55M |     case FieldType::OLAP_FIELD_TYPE_TINYINT: | 
| 142 | 1.55M |         ptr = doris::vectorized::PredicateColumnType<TYPE_TINYINT>::create(); | 
| 143 | 1.55M |         break; | 
| 144 | 872 |     case FieldType::OLAP_FIELD_TYPE_SMALLINT: | 
| 145 | 872 |         ptr = doris::vectorized::PredicateColumnType<TYPE_SMALLINT>::create(); | 
| 146 | 872 |         break; | 
| 147 | 18.9k |     case FieldType::OLAP_FIELD_TYPE_INT: | 
| 148 | 18.9k |         ptr = doris::vectorized::PredicateColumnType<TYPE_INT>::create(); | 
| 149 | 18.9k |         break; | 
| 150 | 20 |     case FieldType::OLAP_FIELD_TYPE_FLOAT: | 
| 151 | 20 |         ptr = doris::vectorized::PredicateColumnType<TYPE_FLOAT>::create(); | 
| 152 | 20 |         break; | 
| 153 | 173 |     case FieldType::OLAP_FIELD_TYPE_DOUBLE: | 
| 154 | 173 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DOUBLE>::create(); | 
| 155 | 173 |         break; | 
| 156 | 3.63k |     case FieldType::OLAP_FIELD_TYPE_BIGINT: | 
| 157 | 3.63k |         ptr = doris::vectorized::PredicateColumnType<TYPE_BIGINT>::create(); | 
| 158 | 3.63k |         break; | 
| 159 | 599 |     case FieldType::OLAP_FIELD_TYPE_LARGEINT: | 
| 160 | 599 |         ptr = doris::vectorized::PredicateColumnType<TYPE_LARGEINT>::create(); | 
| 161 | 599 |         break; | 
| 162 | 458 |     case FieldType::OLAP_FIELD_TYPE_DATE: | 
| 163 | 458 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DATE>::create(); | 
| 164 | 458 |         break; | 
| 165 | 3.23k |     case FieldType::OLAP_FIELD_TYPE_DATEV2: | 
| 166 | 3.23k |         ptr = doris::vectorized::PredicateColumnType<TYPE_DATEV2>::create(); | 
| 167 | 3.23k |         break; | 
| 168 | 1.79k |     case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: | 
| 169 | 1.79k |         ptr = doris::vectorized::PredicateColumnType<TYPE_DATETIMEV2>::create(); | 
| 170 | 1.79k |         break; | 
| 171 | 371 |     case FieldType::OLAP_FIELD_TYPE_DATETIME: | 
| 172 | 371 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DATETIME>::create(); | 
| 173 | 371 |         break; | 
| 174 | 1.11k |     case FieldType::OLAP_FIELD_TYPE_CHAR: | 
| 175 | 1.11k |         if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) { | 
| 176 | 1.11k |             ptr = doris::vectorized::ColumnDictI32::create(type); | 
| 177 | 1.11k |         } else { | 
| 178 | 4 |             ptr = doris::vectorized::PredicateColumnType<TYPE_CHAR>::create(); | 
| 179 | 4 |         } | 
| 180 | 1.11k |         break; | 
| 181 | 0 |     case FieldType::OLAP_FIELD_TYPE_VARCHAR: | 
| 182 | 12.6k |     case FieldType::OLAP_FIELD_TYPE_STRING: | 
| 183 | 12.7k |     case FieldType::OLAP_FIELD_TYPE_JSONB: | 
| 184 | 12.7k |         if (config::enable_low_cardinality_optimize && reader_type == ReaderType::READER_QUERY) { | 
| 185 | 11.7k |             ptr = doris::vectorized::ColumnDictI32::create(type); | 
| 186 | 11.7k |         } else { | 
| 187 | 976 |             ptr = doris::vectorized::PredicateColumnType<TYPE_STRING>::create(); | 
| 188 | 976 |         } | 
| 189 | 12.7k |         break; | 
| 190 | 5 |     case FieldType::OLAP_FIELD_TYPE_DECIMAL: | 
| 191 | 5 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMALV2>::create(); | 
| 192 | 5 |         break; | 
| 193 | 145 |     case FieldType::OLAP_FIELD_TYPE_DECIMAL32: | 
| 194 | 145 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL32>::create(); | 
| 195 | 145 |         break; | 
| 196 | 233 |     case FieldType::OLAP_FIELD_TYPE_DECIMAL64: | 
| 197 | 233 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL64>::create(); | 
| 198 | 233 |         break; | 
| 199 | 807 |     case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: | 
| 200 | 807 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL128I>::create(); | 
| 201 | 807 |         break; | 
| 202 | 197 |     case FieldType::OLAP_FIELD_TYPE_DECIMAL256: | 
| 203 | 197 |         ptr = doris::vectorized::PredicateColumnType<TYPE_DECIMAL256>::create(); | 
| 204 | 197 |         break; | 
| 205 | 91 |     case FieldType::OLAP_FIELD_TYPE_IPV4: | 
| 206 | 91 |         ptr = doris::vectorized::PredicateColumnType<TYPE_IPV4>::create(); | 
| 207 | 91 |         break; | 
| 208 | 93 |     case FieldType::OLAP_FIELD_TYPE_IPV6: | 
| 209 | 93 |         ptr = doris::vectorized::PredicateColumnType<TYPE_IPV6>::create(); | 
| 210 | 93 |         break; | 
| 211 | 0 |     default: | 
| 212 | 0 |         throw Exception( | 
| 213 | 0 |                 ErrorCode::SCHEMA_SCHEMA_FIELD_INVALID, | 
| 214 | 0 |                 fmt::format("Unexpected type when choosing predicate column, type={}", int(type))); | 
| 215 | 1.59M |     } | 
| 216 |  |  | 
| 217 | 1.60M |     if (is_nullable) { | 
| 218 | 22.2k |         return doris::vectorized::ColumnNullable::create(std::move(ptr), | 
| 219 | 22.2k |                                                          doris::vectorized::ColumnUInt8::create()); | 
| 220 | 22.2k |     } | 
| 221 | 1.57M |     return ptr; | 
| 222 | 1.60M | } | 
| 223 |  |  | 
| 224 |  | } // namespace doris |