/root/doris/be/src/olap/field.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <sstream> | 
| 21 |  | #include <string> | 
| 22 |  |  | 
| 23 |  | #include "olap/key_coder.h" | 
| 24 |  | #include "olap/olap_common.h" | 
| 25 |  | #include "olap/olap_define.h" | 
| 26 |  | #include "olap/row_cursor_cell.h" | 
| 27 |  | #include "olap/tablet_schema.h" | 
| 28 |  | #include "olap/types.h" | 
| 29 |  | #include "olap/utils.h" | 
| 30 |  | #include "runtime/collection_value.h" | 
| 31 |  | #include "runtime/map_value.h" | 
| 32 |  | #include "util/hash_util.hpp" | 
| 33 |  | #include "util/slice.h" | 
| 34 |  | #include "vec/common/arena.h" | 
| 35 |  | #include "vec/json/path_in_data.h" | 
| 36 |  |  | 
| 37 |  | namespace doris { | 
| 38 |  |  | 
| 39 |  | // A Field is used to represent a column in memory format. | 
| 40 |  | // User can use this class to access or deal with column data in memory. | 
| 41 |  | class Field { | 
| 42 |  | public: | 
| 43 |  |     Field(const TabletColumn& column) | 
| 44 |  |             : _type_info(get_type_info(&column)), | 
| 45 |  |               _desc(column), | 
| 46 |  |               _length(column.length()), | 
| 47 |  |               _key_coder(get_key_coder(column.type())), | 
| 48 |  |               _name(column.name()), | 
| 49 |  |               _index_size(column.index_length()), | 
| 50 |  |               _is_nullable(column.is_nullable()), | 
| 51 |  |               _unique_id(column.unique_id()), | 
| 52 |  |               _parent_unique_id(column.parent_unique_id()), | 
| 53 |  |               _is_extracted_column(column.is_extracted_column()), | 
| 54 | 30.5k |               _path(column.path_info_ptr()) {} | 
| 55 |  |  | 
| 56 | 30.5k |     virtual ~Field() = default; | 
| 57 |  |  | 
| 58 | 549k |     size_t size() const { return _type_info->size(); } | 
| 59 | 0 |     int32_t length() const { return _length; } | 
| 60 | 0 |     size_t field_size() const { return size() + 1; } | 
| 61 | 0 |     size_t index_size() const { return _index_size; } | 
| 62 | 18.5k |     int32_t unique_id() const { return _unique_id; } | 
| 63 | 556 |     int32_t parent_unique_id() const { return _parent_unique_id; } | 
| 64 | 19.1k |     bool is_extracted_column() const { return _is_extracted_column; } | 
| 65 | 47.6k |     const std::string& name() const { return _name; } | 
| 66 | 0 |     const vectorized::PathInDataPtr& path() const { return _path; } | 
| 67 |  |  | 
| 68 | 34.2k |     virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } | 
| 69 | 34.1k |     virtual void set_to_zone_map_max(char* buf) const { set_to_max(buf); } | 
| 70 |  |  | 
| 71 | 34.2k |     virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } | 
| 72 | 34.2k |     virtual void set_to_zone_map_min(char* buf) const { set_to_min(buf); } | 
| 73 |  |  | 
| 74 | 4 |     void set_long_text_buf(char** buf) { _long_text_buf = buf; } | 
| 75 |  |  | 
| 76 |  |     // This function allocate memory from arena, other than allocate_memory | 
| 77 |  |     // reserve memory from continuous memory. | 
| 78 | 44.8k |     virtual char* allocate_value(vectorized::Arena* arena) const { | 
| 79 | 44.8k |         return arena->alloc(_type_info->size()); | 
| 80 | 44.8k |     } | 
| 81 |  |  | 
| 82 | 44.8k |     virtual char* allocate_zone_map_value(vectorized::Arena* arena) const { | 
| 83 | 44.8k |         return allocate_value(arena); | 
| 84 | 44.8k |     } | 
| 85 |  |  | 
| 86 | 225 |     virtual size_t get_variable_len() const { return 0; } | 
| 87 |  |  | 
| 88 | 22.9k |     virtual void modify_zone_map_index(char*) const {} | 
| 89 |  |  | 
| 90 | 0 |     virtual Field* clone() const { | 
| 91 | 0 |         auto* local = new Field(_desc); | 
| 92 | 0 |         this->clone(local); | 
| 93 | 0 |         return local; | 
| 94 | 0 |     } | 
| 95 |  |  | 
| 96 |  |     // Only compare column content, without considering nullptr condition. | 
| 97 |  |     // RETURNS: | 
| 98 |  |     //      0 means equal, | 
| 99 |  |     //      -1 means left less than right, | 
| 100 |  |     //      1 means left bigger than right | 
| 101 | 35.8k |     int compare(const void* left, const void* right) const { return _type_info->cmp(left, right); } | 
| 102 |  |  | 
| 103 |  |     // Compare two types of cell. | 
| 104 |  |     // This function differs compare in that this function compare cell which | 
| 105 |  |     // will consider the condition which cell may be nullptr. While compare only | 
| 106 |  |     // compare column content without considering nullptr condition. | 
| 107 |  |     // Only compare column content, without considering nullptr condition. | 
| 108 |  |     // RETURNS: | 
| 109 |  |     //      0 means equal, | 
| 110 |  |     //      -1 means left less than right, | 
| 111 |  |     //      1 means left bigger than right | 
| 112 |  |     template <typename LhsCellType, typename RhsCellType> | 
| 113 | 0 |     int compare_cell(const LhsCellType& lhs, const RhsCellType& rhs) const { | 
| 114 | 0 |         bool l_null = lhs.is_null(); | 
| 115 | 0 |         bool r_null = rhs.is_null(); | 
| 116 | 0 |         if (l_null != r_null) {  Branch (116:13): [True: 0, False: 0]
 | 
| 117 | 0 |             return l_null ? -1 : 1;   Branch (117:20): [True: 0, False: 0]
 | 
| 118 | 0 |         } | 
| 119 | 0 |         return l_null ? 0 : _type_info->cmp(lhs.cell_ptr(), rhs.cell_ptr());   Branch (119:16): [True: 0, False: 0]
 | 
| 120 | 0 |     } Unexecuted instantiation: _ZNK5doris5Field12compare_cellINS_12WrapperFieldES2_EEiRKT_RKT0_Unexecuted instantiation: _ZNK5doris5Field12compare_cellINS_13RowCursorCellES2_EEiRKT_RKT0_ | 
| 121 |  |  | 
| 122 |  |     // Copy source cell's content to destination cell directly. | 
| 123 |  |     // For string type, this function assume that destination has | 
| 124 |  |     // enough space and copy source content into destination without | 
| 125 |  |     // memory allocation. | 
| 126 |  |     template <typename DstCellType, typename SrcCellType> | 
| 127 | 0 |     void direct_copy(DstCellType* dst, const SrcCellType& src) const { | 
| 128 | 0 |         bool is_null = src.is_null(); | 
| 129 | 0 |         dst->set_is_null(is_null); | 
| 130 | 0 |         if (is_null) { | 
| 131 | 0 |             return; | 
| 132 | 0 |         } | 
| 133 | 0 |         if (type() == FieldType::OLAP_FIELD_TYPE_STRING) { | 
| 134 | 0 |             auto dst_slice = reinterpret_cast<Slice*>(dst->mutable_cell_ptr()); | 
| 135 | 0 |             auto src_slice = reinterpret_cast<const Slice*>(src.cell_ptr()); | 
| 136 | 0 |             if (dst_slice->size < src_slice->size) { | 
| 137 | 0 |                 *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, src_slice->size)); | 
| 138 | 0 |                 dst_slice->data = *_long_text_buf; | 
| 139 | 0 |                 dst_slice->size = src_slice->size; | 
| 140 | 0 |             } | 
| 141 | 0 |         } | 
| 142 | 0 |         return _type_info->direct_copy(dst->mutable_cell_ptr(), src.cell_ptr()); | 
| 143 | 0 |     } | 
| 144 |  |  | 
| 145 |  |     // deep copy source cell' content to destination cell. | 
| 146 |  |     // For string type, this will allocate data form arena, | 
| 147 |  |     // and copy source's content. | 
| 148 |  |     template <typename DstCellType, typename SrcCellType> | 
| 149 |  |     void deep_copy(DstCellType* dst, const SrcCellType& src, vectorized::Arena* arena) const { | 
| 150 |  |         bool is_null = src.is_null(); | 
| 151 |  |         dst->set_is_null(is_null); | 
| 152 |  |         if (is_null) { | 
| 153 |  |             return; | 
| 154 |  |         } | 
| 155 |  |         _type_info->deep_copy(dst->mutable_cell_ptr(), src.cell_ptr(), arena); | 
| 156 |  |     } | 
| 157 |  |  | 
| 158 |  |     // used by init scan key stored in string format | 
| 159 |  |     // value_string should end with '\0' | 
| 160 |  |     Status from_string(char* buf, const std::string& value_string, const int precision = 0, | 
| 161 | 82 |                        const int scale = 0) const { | 
| 162 | 82 |         if (type() == FieldType::OLAP_FIELD_TYPE_STRING && !value_string.empty()) {  Branch (162:13): [True: 4, False: 78]
  Branch (162:60): [True: 4, False: 0]
 | 
| 163 | 4 |             auto slice = reinterpret_cast<Slice*>(buf); | 
| 164 | 4 |             if (slice->size < value_string.size()) {  Branch (164:17): [True: 0, False: 4]
 | 
| 165 | 0 |                 *_long_text_buf = static_cast<char*>(realloc(*_long_text_buf, value_string.size())); | 
| 166 | 0 |                 slice->data = *_long_text_buf; | 
| 167 | 0 |                 slice->size = value_string.size(); | 
| 168 | 0 |             } | 
| 169 | 4 |         } | 
| 170 | 82 |         return _type_info->from_string(buf, value_string, precision, scale); | 
| 171 | 82 |     } | 
| 172 |  |  | 
| 173 |  |     //  convert inner value to string | 
| 174 |  |     //  performance is not considered, only for debug use | 
| 175 | 57.7k |     std::string to_string(const char* src) const { return _type_info->to_string(src); } | 
| 176 |  |  | 
| 177 |  |     template <typename CellType> | 
| 178 |  |     std::string debug_string(const CellType& cell) const { | 
| 179 |  |         std::stringstream ss; | 
| 180 |  |         if (cell.is_null()) { | 
| 181 |  |             ss << "(null)"; | 
| 182 |  |         } else { | 
| 183 |  |             ss << _type_info->to_string(cell.cell_ptr()); | 
| 184 |  |         } | 
| 185 |  |         return ss.str(); | 
| 186 |  |     } | 
| 187 |  |  | 
| 188 | 126k |     FieldType type() const { return _type_info->type(); } | 
| 189 | 93.3k |     const TypeInfo* type_info() const { return _type_info.get(); } | 
| 190 | 117k |     bool is_nullable() const { return _is_nullable; } | 
| 191 |  |  | 
| 192 |  |     // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. | 
| 193 |  |     // only applicable to string type | 
| 194 | 4 |     void encode_ascending(const void* value, std::string* buf) const { | 
| 195 | 4 |         _key_coder->encode_ascending(value, _index_size, buf); | 
| 196 | 4 |     } | 
| 197 |  |  | 
| 198 |  |     // encode the provided `value` into `buf`. | 
| 199 | 322k |     void full_encode_ascending(const void* value, std::string* buf) const { | 
| 200 | 322k |         _key_coder->full_encode_ascending(value, buf); | 
| 201 | 322k |     } | 
| 202 | 647 |     void add_sub_field(std::unique_ptr<Field> sub_field) { | 
| 203 | 647 |         _sub_fields.emplace_back(std::move(sub_field)); | 
| 204 | 647 |     } | 
| 205 | 111 |     Field* get_sub_field(int i) const { return _sub_fields[i].get(); } | 
| 206 | 1 |     size_t get_sub_field_count() const { return _sub_fields.size(); } | 
| 207 |  |  | 
| 208 | 34 |     void set_precision(int32_t precision) { _precision = precision; } | 
| 209 | 34 |     void set_scale(int32_t scale) { _scale = scale; } | 
| 210 | 2 |     int32_t get_precision() const { return _precision; } | 
| 211 | 2 |     int32_t get_scale() const { return _scale; } | 
| 212 | 238k |     const TabletColumn& get_desc() const { return _desc; } | 
| 213 |  |  | 
| 214 | 13.4k |     int32_t get_unique_id() const { | 
| 215 | 13.4k |         return is_extracted_column() ? parent_unique_id() : unique_id();   Branch (215:16): [True: 250, False: 13.2k]
 | 
| 216 | 13.4k |     } | 
| 217 |  |  | 
| 218 |  | protected: | 
| 219 |  |     TypeInfoPtr _type_info; | 
| 220 |  |     TabletColumn _desc; | 
| 221 |  |     // unit : byte | 
| 222 |  |     // except for strings, other types have fixed lengths | 
| 223 |  |     // Note that, the struct type itself has fixed length, but due to | 
| 224 |  |     // its number of subfields is a variable, so the actual length of | 
| 225 |  |     // a struct field is not fixed. | 
| 226 |  |     uint32_t _length; | 
| 227 |  |     // Since the length of the STRING type cannot be determined, | 
| 228 |  |     // only dynamic memory can be used. Arena cannot realize realloc. | 
| 229 |  |     // The schema information is shared globally. Therefore, | 
| 230 |  |     // dynamic memory can only be managed in thread local mode. | 
| 231 |  |     // The memory will be created and released in rowcursor. | 
| 232 |  |     char** _long_text_buf = nullptr; | 
| 233 |  |  | 
| 234 | 0 |     char* allocate_string_value(vectorized::Arena* arena) const { | 
| 235 | 0 |         char* type_value = arena->alloc(sizeof(Slice)); | 
| 236 | 0 |         auto slice = reinterpret_cast<Slice*>(type_value); | 
| 237 | 0 |         slice->size = _length; | 
| 238 | 0 |         slice->data = arena->alloc(slice->size); | 
| 239 | 0 |         return type_value; | 
| 240 | 0 |     } | 
| 241 |  |  | 
| 242 | 2 |     void clone(Field* other) const { | 
| 243 | 2 |         other->_type_info = clone_type_info(this->_type_info.get()); | 
| 244 | 2 |         other->_key_coder = this->_key_coder; | 
| 245 | 2 |         other->_name = this->_name; | 
| 246 | 2 |         other->_index_size = this->_index_size; | 
| 247 | 2 |         other->_is_nullable = this->_is_nullable; | 
| 248 | 2 |         other->_sub_fields.clear(); | 
| 249 | 2 |         other->_precision = this->_precision; | 
| 250 | 2 |         other->_scale = this->_scale; | 
| 251 | 2 |         other->_unique_id = this->_unique_id; | 
| 252 | 2 |         other->_parent_unique_id = this->_parent_unique_id; | 
| 253 | 2 |         other->_is_extracted_column = this->_is_extracted_column; | 
| 254 | 2 |         for (const auto& f : _sub_fields) {  Branch (254:28): [True: 0, False: 2]
 | 
| 255 | 0 |             Field* item = f->clone(); | 
| 256 | 0 |             other->add_sub_field(std::unique_ptr<Field>(item)); | 
| 257 | 0 |         } | 
| 258 | 2 |     } | 
| 259 |  |  | 
| 260 |  | private: | 
| 261 |  |     // maximum length of Field, unit : bytes | 
| 262 |  |     // usually equal to length, except for variable-length strings | 
| 263 |  |     const KeyCoder* _key_coder; | 
| 264 |  |     std::string _name; | 
| 265 |  |     uint16_t _index_size; | 
| 266 |  |     bool _is_nullable; | 
| 267 |  |     std::vector<std::unique_ptr<Field>> _sub_fields; | 
| 268 |  |     int32_t _precision; | 
| 269 |  |     int32_t _scale; | 
| 270 |  |     int32_t _unique_id; | 
| 271 |  |     int32_t _parent_unique_id; | 
| 272 |  |     bool _is_extracted_column = false; | 
| 273 |  |     vectorized::PathInDataPtr _path; | 
| 274 |  | }; | 
| 275 |  |  | 
| 276 |  | class MapField : public Field { | 
| 277 |  | public: | 
| 278 | 313 |     MapField(const TabletColumn& column) : Field(column) {} | 
| 279 |  |  | 
| 280 | 0 |     size_t get_variable_len() const override { return _length; } | 
| 281 |  | }; | 
| 282 |  |  | 
| 283 |  | class StructField : public Field { | 
| 284 |  | public: | 
| 285 | 0 |     StructField(const TabletColumn& column) : Field(column) {} | 
| 286 |  |  | 
| 287 | 0 |     size_t get_variable_len() const override { | 
| 288 | 0 |         size_t variable_len = _length; | 
| 289 | 0 |         for (size_t i = 0; i < get_sub_field_count(); i++) {  Branch (289:28): [True: 0, False: 0]
 | 
| 290 | 0 |             variable_len += get_sub_field(i)->get_variable_len(); | 
| 291 | 0 |         } | 
| 292 | 0 |         return variable_len; | 
| 293 | 0 |     } | 
| 294 |  | }; | 
| 295 |  |  | 
| 296 |  | class ArrayField : public Field { | 
| 297 |  | public: | 
| 298 | 21 |     ArrayField(const TabletColumn& column) : Field(column) {} | 
| 299 |  |  | 
| 300 | 0 |     size_t get_variable_len() const override { return _length; } | 
| 301 |  | }; | 
| 302 |  |  | 
| 303 |  | class CharField : public Field { | 
| 304 |  | public: | 
| 305 | 7 |     CharField(const TabletColumn& column) : Field(column) {} | 
| 306 |  |  | 
| 307 | 2 |     size_t get_variable_len() const override { return _length; } | 
| 308 |  |  | 
| 309 | 1 |     CharField* clone() const override { | 
| 310 | 1 |         auto* local = new CharField(_desc); | 
| 311 | 1 |         Field::clone(local); | 
| 312 | 1 |         return local; | 
| 313 | 1 |     } | 
| 314 |  |  | 
| 315 | 0 |     char* allocate_value(vectorized::Arena* arena) const override { | 
| 316 | 0 |         return Field::allocate_string_value(arena); | 
| 317 | 0 |     } | 
| 318 |  |  | 
| 319 | 0 |     void set_to_max(char* ch) const override { | 
| 320 | 0 |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 321 | 0 |         slice->size = _length; | 
| 322 | 0 |         memset(slice->data, 0xFF, slice->size); | 
| 323 | 0 |     } | 
| 324 |  |  | 
| 325 |  |     // To prevent zone map cost too many memory, if varchar length | 
| 326 |  |     // longer than `MAX_ZONE_MAP_INDEX_SIZE`. we just allocate | 
| 327 |  |     // `MAX_ZONE_MAP_INDEX_SIZE` of memory | 
| 328 | 8 |     char* allocate_zone_map_value(vectorized::Arena* arena) const override { | 
| 329 | 8 |         char* type_value = arena->alloc(sizeof(Slice)); | 
| 330 | 8 |         auto slice = reinterpret_cast<Slice*>(type_value); | 
| 331 | 8 |         slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : MAX_ZONE_MAP_INDEX_SIZE;   Branch (331:23): [True: 8, False: 0]
 | 
| 332 | 8 |         slice->data = arena->alloc(slice->size); | 
| 333 | 8 |         return type_value; | 
| 334 | 8 |     } | 
| 335 |  |  | 
| 336 |  |     // only varchar filed need modify zone map index when zone map max_value | 
| 337 |  |     // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one | 
| 338 |  |     // for the last byte | 
| 339 |  |     // In UTF8 encoding, here do not appear 0xff in last byte | 
| 340 | 6 |     void modify_zone_map_index(char* src) const override { | 
| 341 | 6 |         auto slice = reinterpret_cast<Slice*>(src); | 
| 342 | 6 |         if (slice->size == MAX_ZONE_MAP_INDEX_SIZE) {  Branch (342:13): [True: 0, False: 6]
 | 
| 343 | 0 |             slice->mutable_data()[slice->size - 1] += 1; | 
| 344 | 0 |         } | 
| 345 | 6 |     } | 
| 346 |  |  | 
| 347 | 8 |     void set_to_zone_map_max(char* ch) const override { | 
| 348 | 8 |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 349 | 8 |         int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : MAX_ZONE_MAP_INDEX_SIZE;   Branch (349:22): [True: 8, False: 0]
 | 
| 350 | 8 |         slice->size = length; | 
| 351 | 8 |         memset(slice->data, 0xFF, slice->size); | 
| 352 | 8 |     } | 
| 353 |  | }; | 
| 354 |  |  | 
| 355 |  | class VarcharField : public Field { | 
| 356 |  | public: | 
| 357 | 21 |     VarcharField(const TabletColumn& column) : Field(column) {} | 
| 358 |  |  | 
| 359 | 1 |     size_t get_variable_len() const override { return _length - OLAP_VARCHAR_MAX_BYTES; } | 
| 360 |  |  | 
| 361 | 0 |     VarcharField* clone() const override { | 
| 362 | 0 |         auto* local = new VarcharField(_desc); | 
| 363 | 0 |         Field::clone(local); | 
| 364 | 0 |         return local; | 
| 365 | 0 |     } | 
| 366 |  |  | 
| 367 | 0 |     char* allocate_value(vectorized::Arena* arena) const override { | 
| 368 | 0 |         return Field::allocate_string_value(arena); | 
| 369 | 0 |     } | 
| 370 |  |  | 
| 371 |  |     // To prevent zone map cost too many memory, if varchar length | 
| 372 |  |     // longer than `MAX_ZONE_MAP_INDEX_SIZE`. we just allocate | 
| 373 |  |     // `MAX_ZONE_MAP_INDEX_SIZE` of memory | 
| 374 | 0 |     char* allocate_zone_map_value(vectorized::Arena* arena) const override { | 
| 375 | 0 |         char* type_value = arena->alloc(sizeof(Slice)); | 
| 376 | 0 |         auto slice = reinterpret_cast<Slice*>(type_value); | 
| 377 | 0 |         slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : MAX_ZONE_MAP_INDEX_SIZE;   Branch (377:23): [True: 0, False: 0]
 | 
| 378 | 0 |         slice->data = arena->alloc(slice->size); | 
| 379 | 0 |         return type_value; | 
| 380 | 0 |     } | 
| 381 |  |  | 
| 382 |  |     // only varchar/string filed need modify zone map index when zone map max_value | 
| 383 |  |     // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one | 
| 384 |  |     // for the last byte | 
| 385 |  |     // In UTF8 encoding, here do not appear 0xff in last byte | 
| 386 | 0 |     void modify_zone_map_index(char* src) const override { | 
| 387 | 0 |         auto slice = reinterpret_cast<Slice*>(src); | 
| 388 | 0 |         if (slice->size == MAX_ZONE_MAP_INDEX_SIZE) {  Branch (388:13): [True: 0, False: 0]
 | 
| 389 | 0 |             slice->mutable_data()[slice->size - 1] += 1; | 
| 390 | 0 |         } | 
| 391 | 0 |     } | 
| 392 |  |  | 
| 393 | 2 |     void set_to_max(char* ch) const override { | 
| 394 | 2 |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 395 | 2 |         slice->size = _length - OLAP_VARCHAR_MAX_BYTES; | 
| 396 | 2 |         memset(slice->data, 0xFF, slice->size); | 
| 397 | 2 |     } | 
| 398 | 0 |     void set_to_zone_map_max(char* ch) const override { | 
| 399 | 0 |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 400 | 0 |         int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : MAX_ZONE_MAP_INDEX_SIZE;   Branch (400:22): [True: 0, False: 0]
 | 
| 401 |  | 
 | 
| 402 | 0 |         slice->size = length - OLAP_VARCHAR_MAX_BYTES; | 
| 403 | 0 |         memset(slice->data, 0xFF, slice->size); | 
| 404 | 0 |     } | 
| 405 |  | }; | 
| 406 |  | class StringField : public Field { | 
| 407 |  | public: | 
| 408 | 6.84k |     StringField(const TabletColumn& column) : Field(column) {} | 
| 409 |  |  | 
| 410 | 1 |     StringField* clone() const override { | 
| 411 | 1 |         auto* local = new StringField(_desc); | 
| 412 | 1 |         Field::clone(local); | 
| 413 | 1 |         return local; | 
| 414 | 1 |     } | 
| 415 |  |  | 
| 416 | 0 |     char* allocate_value(vectorized::Arena* arena) const override { | 
| 417 | 0 |         return Field::allocate_string_value(arena); | 
| 418 | 0 |     } | 
| 419 |  |  | 
| 420 | 10.9k |     char* allocate_zone_map_value(vectorized::Arena* arena) const override { | 
| 421 | 10.9k |         char* type_value = arena->alloc(sizeof(Slice)); | 
| 422 | 10.9k |         auto slice = reinterpret_cast<Slice*>(type_value); | 
| 423 | 10.9k |         slice->size = MAX_ZONE_MAP_INDEX_SIZE; | 
| 424 | 10.9k |         slice->data = arena->alloc(slice->size); | 
| 425 | 10.9k |         return type_value; | 
| 426 | 10.9k |     } | 
| 427 | 0 |     void set_to_max(char* ch) const override { | 
| 428 | 0 |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 429 | 0 |         memset(slice->data, 0xFF, slice->size); | 
| 430 | 0 |     } | 
| 431 |  |     // only varchar/string filed need modify zone map index when zone map max_value | 
| 432 |  |     // index longer than `MAX_ZONE_MAP_INDEX_SIZE`. so here we add one | 
| 433 |  |     // for the last byte | 
| 434 |  |     // In UTF8 encoding, here do not appear 0xff in last byte | 
| 435 | 8.89k |     void modify_zone_map_index(char* src) const override { | 
| 436 | 8.89k |         auto slice = reinterpret_cast<Slice*>(src); | 
| 437 | 8.89k |         if (slice->size == MAX_ZONE_MAP_INDEX_SIZE) {  Branch (437:13): [True: 6.80k, False: 2.09k]
 | 
| 438 | 6.80k |             slice->mutable_data()[slice->size - 1] += 1; | 
| 439 | 6.80k |         } | 
| 440 | 8.89k |     } | 
| 441 |  |  | 
| 442 | 11.6k |     void set_to_zone_map_max(char* ch) const override { | 
| 443 | 11.6k |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 444 | 11.6k |         memset(slice->data, 0xFF, slice->size); | 
| 445 | 11.6k |     } | 
| 446 | 11.6k |     void set_to_zone_map_min(char* ch) const override { | 
| 447 | 11.6k |         auto slice = reinterpret_cast<Slice*>(ch); | 
| 448 | 11.6k |         memset(slice->data, 0x00, slice->size); | 
| 449 | 11.6k |     } | 
| 450 |  | }; | 
| 451 |  |  | 
| 452 |  | class BitmapAggField : public Field { | 
| 453 |  | public: | 
| 454 | 0 |     BitmapAggField(const TabletColumn& column) : Field(column) {} | 
| 455 |  |  | 
| 456 | 0 |     BitmapAggField* clone() const override { | 
| 457 | 0 |         auto* local = new BitmapAggField(_desc); | 
| 458 | 0 |         Field::clone(local); | 
| 459 | 0 |         return local; | 
| 460 | 0 |     } | 
| 461 |  | }; | 
| 462 |  |  | 
| 463 |  | class QuantileStateAggField : public Field { | 
| 464 |  | public: | 
| 465 | 0 |     QuantileStateAggField(const TabletColumn& column) : Field(column) {} | 
| 466 |  |  | 
| 467 | 0 |     QuantileStateAggField* clone() const override { | 
| 468 | 0 |         auto* local = new QuantileStateAggField(_desc); | 
| 469 | 0 |         Field::clone(local); | 
| 470 | 0 |         return local; | 
| 471 | 0 |     } | 
| 472 |  | }; | 
| 473 |  |  | 
| 474 |  | class AggStateField : public Field { | 
| 475 |  | public: | 
| 476 | 0 |     AggStateField(const TabletColumn& column) : Field(column) {} | 
| 477 |  |  | 
| 478 | 0 |     AggStateField* clone() const override { | 
| 479 | 0 |         auto* local = new AggStateField(_desc); | 
| 480 | 0 |         Field::clone(local); | 
| 481 | 0 |         return local; | 
| 482 | 0 |     } | 
| 483 |  | }; | 
| 484 |  |  | 
| 485 |  | class HllAggField : public Field { | 
| 486 |  | public: | 
| 487 | 1 |     HllAggField(const TabletColumn& column) : Field(column) {} | 
| 488 |  |  | 
| 489 | 0 |     HllAggField* clone() const override { | 
| 490 | 0 |         auto* local = new HllAggField(_desc); | 
| 491 | 0 |         Field::clone(local); | 
| 492 | 0 |         return local; | 
| 493 | 0 |     } | 
| 494 |  | }; | 
| 495 |  |  | 
| 496 |  | class FieldFactory { | 
| 497 |  | public: | 
| 498 | 30.5k |     static Field* create(const TabletColumn& column) { | 
| 499 |  |         // for key column | 
| 500 | 30.5k |         if (column.is_key()) {  Branch (500:13): [True: 7.00k, False: 23.5k]
 | 
| 501 | 7.00k |             switch (column.type()) { | 
| 502 | 4 |             case FieldType::OLAP_FIELD_TYPE_CHAR:   Branch (502:13): [True: 4, False: 7.00k]
 | 
| 503 | 4 |                 return new CharField(column); | 
| 504 | 137 |             case FieldType::OLAP_FIELD_TYPE_VARCHAR:   Branch (504:13): [True: 137, False: 6.87k]
 | 
| 505 | 353 |             case FieldType::OLAP_FIELD_TYPE_STRING:   Branch (505:13): [True: 216, False: 6.79k]
 | 
| 506 | 353 |                 return new StringField(column); | 
| 507 | 0 |             case FieldType::OLAP_FIELD_TYPE_STRUCT: {  Branch (507:13): [True: 0, False: 7.00k]
 | 
| 508 | 0 |                 auto* local = new StructField(column); | 
| 509 | 0 |                 for (uint32_t i = 0; i < column.get_subtype_count(); i++) {  Branch (509:38): [True: 0, False: 0]
 | 
| 510 | 0 |                     std::unique_ptr<Field> sub_field( | 
| 511 | 0 |                             FieldFactory::create(column.get_sub_column(i))); | 
| 512 | 0 |                     local->add_sub_field(std::move(sub_field)); | 
| 513 | 0 |                 } | 
| 514 | 0 |                 return local; | 
| 515 | 137 |             } | 
| 516 | 0 |             case FieldType::OLAP_FIELD_TYPE_ARRAY: {  Branch (516:13): [True: 0, False: 7.00k]
 | 
| 517 | 0 |                 std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); | 
| 518 | 0 |                 auto* local = new ArrayField(column); | 
| 519 | 0 |                 local->add_sub_field(std::move(item_field)); | 
| 520 | 0 |                 return local; | 
| 521 | 137 |             } | 
| 522 | 0 |             case FieldType::OLAP_FIELD_TYPE_MAP: {  Branch (522:13): [True: 0, False: 7.00k]
 | 
| 523 | 0 |                 std::unique_ptr<Field> key_field(FieldFactory::create(column.get_sub_column(0))); | 
| 524 | 0 |                 std::unique_ptr<Field> val_field(FieldFactory::create(column.get_sub_column(1))); | 
| 525 | 0 |                 auto* local = new MapField(column); | 
| 526 | 0 |                 local->add_sub_field(std::move(key_field)); | 
| 527 | 0 |                 local->add_sub_field(std::move(val_field)); | 
| 528 | 0 |                 return local; | 
| 529 | 137 |             } | 
| 530 | 3 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL:   Branch (530:13): [True: 3, False: 7.00k]
 | 
| 531 | 3 |                 [[fallthrough]]; | 
| 532 | 3 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL32:   Branch (532:13): [True: 0, False: 7.00k]
 | 
| 533 | 3 |                 [[fallthrough]]; | 
| 534 | 3 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL64:   Branch (534:13): [True: 0, False: 7.00k]
 | 
| 535 | 3 |                 [[fallthrough]]; | 
| 536 | 3 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:   Branch (536:13): [True: 0, False: 7.00k]
 | 
| 537 | 3 |                 [[fallthrough]]; | 
| 538 | 3 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL256:   Branch (538:13): [True: 0, False: 7.00k]
 | 
| 539 | 3 |                 [[fallthrough]]; | 
| 540 | 3 |             case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {  Branch (540:13): [True: 0, False: 7.00k]
 | 
| 541 | 3 |                 Field* field = new Field(column); | 
| 542 | 3 |                 field->set_precision(column.precision()); | 
| 543 | 3 |                 field->set_scale(column.frac()); | 
| 544 | 3 |                 return field; | 
| 545 | 3 |             } | 
| 546 | 6.64k |             default:   Branch (546:13): [True: 6.64k, False: 360]
 | 
| 547 | 6.64k |                 return new Field(column); | 
| 548 | 7.00k |             } | 
| 549 | 7.00k |         } | 
| 550 |  |  | 
| 551 |  |         // for value column | 
| 552 | 23.5k |         switch (column.aggregation()) {  Branch (552:17): [True: 0, False: 23.5k]
 | 
| 553 | 22.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE:   Branch (553:9): [True: 22.5k, False: 1.07k]
 | 
| 554 | 23.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM:   Branch (554:9): [True: 1.02k, False: 22.5k]
 | 
| 555 | 23.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN:   Branch (555:9): [True: 1, False: 23.5k]
 | 
| 556 | 23.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX:   Branch (556:9): [True: 2, False: 23.5k]
 | 
| 557 | 23.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE:   Branch (557:9): [True: 40, False: 23.5k]
 | 
| 558 | 23.5k |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL:   Branch (558:9): [True: 0, False: 23.5k]
 | 
| 559 | 23.5k |             switch (column.type()) { | 
| 560 | 2 |             case FieldType::OLAP_FIELD_TYPE_CHAR:   Branch (560:13): [True: 2, False: 23.5k]
 | 
| 561 | 2 |                 return new CharField(column); | 
| 562 | 21 |             case FieldType::OLAP_FIELD_TYPE_VARCHAR:   Branch (562:13): [True: 21, False: 23.5k]
 | 
| 563 | 21 |                 return new VarcharField(column); | 
| 564 | 6.49k |             case FieldType::OLAP_FIELD_TYPE_STRING:   Branch (564:13): [True: 6.49k, False: 17.0k]
 | 
| 565 | 6.49k |                 return new StringField(column); | 
| 566 | 0 |             case FieldType::OLAP_FIELD_TYPE_STRUCT: {  Branch (566:13): [True: 0, False: 23.5k]
 | 
| 567 | 0 |                 auto* local = new StructField(column); | 
| 568 | 0 |                 for (uint32_t i = 0; i < column.get_subtype_count(); i++) {  Branch (568:38): [True: 0, False: 0]
 | 
| 569 | 0 |                     std::unique_ptr<Field> sub_field( | 
| 570 | 0 |                             FieldFactory::create(column.get_sub_column(i))); | 
| 571 | 0 |                     local->add_sub_field(std::move(sub_field)); | 
| 572 | 0 |                 } | 
| 573 | 0 |                 return local; | 
| 574 | 0 |             } | 
| 575 | 21 |             case FieldType::OLAP_FIELD_TYPE_ARRAY: {  Branch (575:13): [True: 21, False: 23.5k]
 | 
| 576 | 21 |                 std::unique_ptr<Field> item_field(FieldFactory::create(column.get_sub_column(0))); | 
| 577 | 21 |                 auto* local = new ArrayField(column); | 
| 578 | 21 |                 local->add_sub_field(std::move(item_field)); | 
| 579 | 21 |                 return local; | 
| 580 | 0 |             } | 
| 581 | 313 |             case FieldType::OLAP_FIELD_TYPE_MAP: {  Branch (581:13): [True: 313, False: 23.2k]
 | 
| 582 | 313 |                 DCHECK(column.get_subtype_count() == 2); | 
| 583 | 313 |                 auto* local = new MapField(column); | 
| 584 | 313 |                 std::unique_ptr<Field> key_field(FieldFactory::create(column.get_sub_column(0))); | 
| 585 | 313 |                 std::unique_ptr<Field> value_field(FieldFactory::create(column.get_sub_column(1))); | 
| 586 | 313 |                 local->add_sub_field(std::move(key_field)); | 
| 587 | 313 |                 local->add_sub_field(std::move(value_field)); | 
| 588 | 313 |                 return local; | 
| 589 | 0 |             } | 
| 590 | 7 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL:   Branch (590:13): [True: 7, False: 23.5k]
 | 
| 591 | 7 |                 [[fallthrough]]; | 
| 592 | 11 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL32:   Branch (592:13): [True: 4, False: 23.5k]
 | 
| 593 | 11 |                 [[fallthrough]]; | 
| 594 | 15 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL64:   Branch (594:13): [True: 4, False: 23.5k]
 | 
| 595 | 15 |                 [[fallthrough]]; | 
| 596 | 22 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL128I:   Branch (596:13): [True: 7, False: 23.5k]
 | 
| 597 | 22 |                 [[fallthrough]]; | 
| 598 | 22 |             case FieldType::OLAP_FIELD_TYPE_DECIMAL256:   Branch (598:13): [True: 0, False: 23.5k]
 | 
| 599 | 22 |                 [[fallthrough]]; | 
| 600 | 31 |             case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: {  Branch (600:13): [True: 9, False: 23.5k]
 | 
| 601 | 31 |                 Field* field = new Field(column); | 
| 602 | 31 |                 field->set_precision(column.precision()); | 
| 603 | 31 |                 field->set_scale(column.frac()); | 
| 604 | 31 |                 return field; | 
| 605 | 22 |             } | 
| 606 | 16.6k |             default:   Branch (606:13): [True: 16.6k, False: 6.88k]
 | 
| 607 | 16.6k |                 return new Field(column); | 
| 608 | 23.5k |             } | 
| 609 | 1 |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION:   Branch (609:9): [True: 1, False: 23.5k]
 | 
| 610 | 1 |             return new HllAggField(column); | 
| 611 | 0 |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION:   Branch (611:9): [True: 0, False: 23.5k]
 | 
| 612 | 0 |             return new BitmapAggField(column); | 
| 613 | 0 |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION:   Branch (613:9): [True: 0, False: 23.5k]
 | 
| 614 | 0 |             return new QuantileStateAggField(column); | 
| 615 | 0 |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC:   Branch (615:9): [True: 0, False: 23.5k]
 | 
| 616 | 0 |             return new AggStateField(column); | 
| 617 | 0 |         case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN:   Branch (617:9): [True: 0, False: 23.5k]
 | 
| 618 | 0 |             CHECK(false) << ", value column no agg type"; | 
| 619 | 0 |             return nullptr; | 
| 620 | 23.5k |         } | 
| 621 | 0 |         return nullptr; | 
| 622 | 23.5k |     } | 
| 623 |  |  | 
| 624 | 118 |     static Field* create_by_type(const FieldType& type) { | 
| 625 | 118 |         TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type); | 
| 626 | 118 |         return create(column); | 
| 627 | 118 |     } | 
| 628 |  | }; | 
| 629 |  |  | 
| 630 |  | } // namespace doris |