Coverage Report

Created: 2026-02-27 11:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/exec/tablet_info.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/tablet_info.h"
19
20
#include <butil/logging.h>
21
#include <gen_cpp/Descriptors_types.h>
22
#include <gen_cpp/Exprs_types.h>
23
#include <gen_cpp/Partitions_types.h>
24
#include <gen_cpp/Types_types.h>
25
#include <gen_cpp/descriptors.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
#include <glog/logging.h>
28
29
#include <algorithm>
30
#include <cstddef>
31
#include <cstdint>
32
#include <memory>
33
#include <ostream>
34
#include <string>
35
#include <tuple>
36
37
#include "common/exception.h"
38
#include "common/logging.h"
39
#include "common/status.h"
40
#include "olap/tablet_schema.h"
41
#include "runtime/define_primitive_type.h"
42
#include "runtime/descriptors.h"
43
#include "runtime/large_int_value.h"
44
#include "runtime/memory/mem_tracker.h"
45
#include "runtime/primitive_type.h"
46
#include "runtime/raw_value.h"
47
#include "runtime/types.h"
48
#include "util/string_parser.hpp"
49
#include "util/string_util.h"
50
#include "vec/columns/column.h"
51
#include "vec/data_types/data_type.h"
52
#include "vec/data_types/data_type_factory.hpp"
53
// NOLINTNEXTLINE(unused-includes)
54
#include "vec/exprs/vexpr_context.h" // IWYU pragma: keep
55
#include "vec/exprs/vliteral.h"
56
#include "vec/functions/cast/cast_to_timestamptz.h"
57
#include "vec/runtime/vdatetime_value.h"
58
59
namespace doris {
60
#include "common/compile_check_begin.h"
61
62
32
void OlapTableIndexSchema::to_protobuf(POlapTableIndexSchema* pindex) const {
63
32
    pindex->set_id(index_id);
64
32
    pindex->set_schema_hash(schema_hash);
65
32
    for (auto* slot : slots) {
66
0
        pindex->add_columns(slot->col_name());
67
0
    }
68
32
    for (auto* column : columns) {
69
0
        column->to_schema_pb(pindex->add_columns_desc());
70
0
    }
71
32
    for (auto* index : indexes) {
72
0
        index->to_schema_pb(pindex->add_indexes_desc());
73
0
    }
74
32
}
75
76
bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator& lhs,
77
104
                                             const BlockRowWithIndicator& rhs) const {
78
104
    vectorized::Block* l_block = std::get<0>(lhs);
79
104
    vectorized::Block* r_block = std::get<0>(rhs);
80
104
    int32_t l_row = std::get<1>(lhs);
81
104
    int32_t r_row = std::get<1>(rhs);
82
104
    bool l_use_new = std::get<2>(lhs);
83
104
    bool r_use_new = std::get<2>(rhs);
84
85
104
    VLOG_TRACE << '\n' << l_block->dump_data() << '\n' << r_block->dump_data();
86
87
104
    if (l_row == -1) {
88
0
        return false;
89
104
    } else if (r_row == -1) {
90
0
        return true;
91
0
    }
92
93
104
    if (_param_locs.empty()) { // no transform, use origin column
94
79
        for (auto slot_loc : _slot_locs) {
95
79
            auto res = l_block->get_by_position(slot_loc).column->compare_at(
96
79
                    l_row, r_row, *r_block->get_by_position(slot_loc).column, -1);
97
79
            if (res != 0) {
98
75
                return res < 0;
99
75
            }
100
79
        }
101
79
    } else { // use transformed column to compare
102
25
        DCHECK(_slot_locs.size() == _param_locs.size())
103
0
                << _slot_locs.size() << ' ' << _param_locs.size();
104
105
25
        const std::vector<uint16_t>* l_index = l_use_new ? &_param_locs : &_slot_locs;
106
25
        const std::vector<uint16_t>* r_index = r_use_new ? &_param_locs : &_slot_locs;
107
108
25
        for (int i = 0; i < _slot_locs.size(); i++) {
109
25
            vectorized::ColumnPtr l_col = l_block->get_by_position((*l_index)[i]).column;
110
25
            vectorized::ColumnPtr r_col = r_block->get_by_position((*r_index)[i]).column;
111
112
25
            auto res = l_col->compare_at(l_row, r_row, *r_col, -1);
113
25
            if (res != 0) {
114
25
                return res < 0;
115
25
            }
116
25
        }
117
25
    }
118
119
    // equal, return false
120
4
    return false;
121
104
}
122
123
14
Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) {
124
14
    _db_id = pschema.db_id();
125
14
    _table_id = pschema.table_id();
126
14
    _version = pschema.version();
127
14
    if (pschema.has_unique_key_update_mode()) {
128
14
        _unique_key_update_mode = pschema.unique_key_update_mode();
129
14
        if (pschema.has_sequence_map_col_unique_id()) {
130
14
            _sequence_map_col_uid = pschema.sequence_map_col_unique_id();
131
14
        }
132
14
    } else {
133
        // for backward compatibility
134
0
        if (pschema.has_partial_update() && pschema.partial_update()) {
135
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
136
0
        } else {
137
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
138
0
        }
139
0
    }
140
14
    _is_strict_mode = pschema.is_strict_mode();
141
14
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
142
0
        _auto_increment_column = pschema.auto_increment_column();
143
0
        if (!_auto_increment_column.empty() && pschema.auto_increment_column_unique_id() == -1) {
144
0
            return Status::InternalError(
145
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
146
0
                    "different from BE.");
147
0
        }
148
0
        _auto_increment_column_unique_id = pschema.auto_increment_column_unique_id();
149
0
    }
150
14
    if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPSERT) {
151
0
        if (pschema.has_partial_update_new_key_policy()) {
152
0
            _partial_update_new_row_policy = pschema.partial_update_new_key_policy();
153
0
        }
154
0
    }
155
14
    _timestamp_ms = pschema.timestamp_ms();
156
14
    if (pschema.has_nano_seconds()) {
157
14
        _nano_seconds = pschema.nano_seconds();
158
14
    }
159
14
    _timezone = pschema.timezone();
160
161
14
    for (const auto& col : pschema.partial_update_input_columns()) {
162
0
        _partial_update_input_columns.insert(col);
163
0
    }
164
14
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
165
166
14
    _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc()));
167
168
84
    for (const auto& p_slot_desc : pschema.slot_descs()) {
169
84
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc));
170
84
        _tuple_desc->add_slot(slot_desc);
171
84
        std::string data_type;
172
84
        EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), data_type);
173
84
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
174
84
        std::string data_type_str =
175
84
                std::to_string(int64_t(TabletColumn::get_field_type_by_string(data_type)));
176
84
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
177
84
                          slot_desc);
178
84
    }
179
180
28
    for (const auto& p_index : pschema.indexes()) {
181
28
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
182
28
        index->index_id = p_index.id();
183
28
        index->schema_hash = p_index.schema_hash();
184
28
        for (const auto& pcolumn_desc : p_index.columns_desc()) {
185
0
            if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS ||
186
0
                _partial_update_input_columns.contains(pcolumn_desc.name())) {
187
0
                std::string is_null_str = pcolumn_desc.is_nullable() ? "true" : "false";
188
0
                std::string data_type_str = std::to_string(
189
0
                        int64_t(TabletColumn::get_field_type_by_string(pcolumn_desc.type())));
190
0
                auto it = slots_map.find(to_lower(pcolumn_desc.name()) + "+" + data_type_str +
191
0
                                         is_null_str);
192
0
                if (it == std::end(slots_map)) {
193
0
                    std::string keys {};
194
0
                    for (const auto& [key, _] : slots_map) {
195
0
                        keys += fmt::format("{},", key);
196
0
                    }
197
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
198
0
                            "[OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema)]: "
199
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
200
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\npschema={}",
201
0
                            pcolumn_desc.name(), pcolumn_desc.type(), data_type_str, is_null_str,
202
0
                            keys, debug_string(), pschema.ShortDebugString());
203
204
0
                    return Status::InternalError("unknown index column, column={}, type={}",
205
0
                                                 pcolumn_desc.name(), pcolumn_desc.type());
206
0
                }
207
0
                index->slots.emplace_back(it->second);
208
0
            }
209
0
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
210
0
            tc->init_from_pb(pcolumn_desc);
211
0
            index->columns.emplace_back(tc);
212
0
        }
213
28
        for (const auto& pindex_desc : p_index.indexes_desc()) {
214
0
            TabletIndex* ti = _obj_pool.add(new TabletIndex());
215
0
            ti->init_from_pb(pindex_desc);
216
0
            index->indexes.emplace_back(ti);
217
0
        }
218
28
        _indexes.emplace_back(index);
219
28
    }
220
221
14
    std::sort(_indexes.begin(), _indexes.end(),
222
28
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
223
28
                  return lhs->index_id < rhs->index_id;
224
28
              });
225
14
    return Status::OK();
226
14
}
227
228
25
Status OlapTableSchemaParam::init_unique_key_update_mode(const TOlapTableSchemaParam& tschema) {
229
25
    if (tschema.__isset.unique_key_update_mode) {
230
0
        switch (tschema.unique_key_update_mode) {
231
0
        case doris::TUniqueKeyUpdateMode::UPSERT: {
232
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
233
0
            break;
234
0
        }
235
0
        case doris::TUniqueKeyUpdateMode::UPDATE_FIXED_COLUMNS: {
236
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
237
0
            break;
238
0
        }
239
0
        case doris::TUniqueKeyUpdateMode::UPDATE_FLEXIBLE_COLUMNS: {
240
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS;
241
0
            break;
242
0
        }
243
0
        default: {
244
0
            return Status::InternalError(
245
0
                    "Unknown unique_key_update_mode: {}, should be one of "
246
0
                    "UPSERT/UPDATE_FIXED_COLUMNS/UPDATE_FLEXIBLE_COLUMNS",
247
0
                    tschema.unique_key_update_mode);
248
0
        }
249
0
        }
250
0
        if (tschema.__isset.sequence_map_col_unique_id) {
251
0
            _sequence_map_col_uid = tschema.sequence_map_col_unique_id;
252
0
        }
253
25
    } else {
254
        // for backward compatibility
255
25
        if (tschema.__isset.is_partial_update && tschema.is_partial_update) {
256
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
257
25
        } else {
258
25
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
259
25
        }
260
25
    }
261
25
    return Status::OK();
262
25
}
263
264
25
Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) {
265
25
    _db_id = tschema.db_id;
266
25
    _table_id = tschema.table_id;
267
25
    _version = tschema.version;
268
25
    RETURN_IF_ERROR(init_unique_key_update_mode(tschema));
269
25
    if (tschema.__isset.is_strict_mode) {
270
25
        _is_strict_mode = tschema.is_strict_mode;
271
25
    }
272
25
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
273
0
        _auto_increment_column = tschema.auto_increment_column;
274
0
        if (!_auto_increment_column.empty() && tschema.auto_increment_column_unique_id == -1) {
275
0
            return Status::InternalError(
276
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
277
0
                    "different from BE.");
278
0
        }
279
0
        _auto_increment_column_unique_id = tschema.auto_increment_column_unique_id;
280
0
    }
281
282
25
    if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPSERT) {
283
0
        if (tschema.__isset.partial_update_new_key_policy) {
284
0
            switch (tschema.partial_update_new_key_policy) {
285
0
            case doris::TPartialUpdateNewRowPolicy::APPEND: {
286
0
                _partial_update_new_row_policy = PartialUpdateNewRowPolicyPB::APPEND;
287
0
                break;
288
0
            }
289
0
            case doris::TPartialUpdateNewRowPolicy::ERROR: {
290
0
                _partial_update_new_row_policy = PartialUpdateNewRowPolicyPB::ERROR;
291
0
                break;
292
0
            }
293
0
            default: {
294
0
                return Status::InvalidArgument(
295
0
                        "Unknown partial_update_new_key_behavior: {}, should be one of "
296
0
                        "'APPEND' or 'ERROR'",
297
0
                        tschema.partial_update_new_key_policy);
298
0
            }
299
0
            }
300
0
        }
301
0
    }
302
303
25
    for (const auto& tcolumn : tschema.partial_update_input_columns) {
304
0
        _partial_update_input_columns.insert(tcolumn);
305
0
    }
306
25
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
307
25
    _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc));
308
105
    for (const auto& t_slot_desc : tschema.slot_descs) {
309
105
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc));
310
105
        _tuple_desc->add_slot(slot_desc);
311
105
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
312
105
        std::string data_type_str = std::to_string(int64_t(slot_desc->col_type()));
313
105
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
314
105
                          slot_desc);
315
105
    }
316
317
41
    for (const auto& t_index : tschema.indexes) {
318
41
        std::unordered_map<std::string, int32_t> index_slots_map;
319
41
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
320
41
        index->index_id = t_index.id;
321
41
        index->schema_hash = t_index.schema_hash;
322
41
        for (const auto& tcolumn_desc : t_index.columns_desc) {
323
0
            if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS ||
324
0
                _partial_update_input_columns.contains(tcolumn_desc.column_name)) {
325
0
                std::string is_null_str = tcolumn_desc.is_allow_null ? "true" : "false";
326
0
                std::string data_type_str =
327
0
                        std::to_string(int64_t(thrift_to_type(tcolumn_desc.column_type.type)));
328
0
                auto it = slots_map.find(to_lower(tcolumn_desc.column_name) + "+" + data_type_str +
329
0
                                         is_null_str);
330
0
                if (it == slots_map.end()) {
331
0
                    std::stringstream ss;
332
0
                    ss << tschema;
333
0
                    std::string keys {};
334
0
                    for (const auto& [key, _] : slots_map) {
335
0
                        keys += fmt::format("{},", key);
336
0
                    }
337
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
338
0
                            "[OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema)]: "
339
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
340
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\ntschema={}",
341
0
                            tcolumn_desc.column_name, tcolumn_desc.column_type.type, data_type_str,
342
0
                            is_null_str, keys, debug_string(), ss.str());
343
0
                    return Status::InternalError("unknown index column, column={}, type={}",
344
0
                                                 tcolumn_desc.column_name,
345
0
                                                 tcolumn_desc.column_type.type);
346
0
                }
347
0
                index->slots.emplace_back(it->second);
348
0
            }
349
0
            index_slots_map.emplace(to_lower(tcolumn_desc.column_name), tcolumn_desc.col_unique_id);
350
0
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
351
0
            tc->init_from_thrift(tcolumn_desc);
352
0
            index->columns.emplace_back(tc);
353
0
        }
354
41
        if (t_index.__isset.indexes_desc) {
355
0
            for (const auto& tindex_desc : t_index.indexes_desc) {
356
0
                std::vector<int32_t> column_unique_ids(tindex_desc.columns.size());
357
0
                for (size_t i = 0; i < tindex_desc.columns.size(); i++) {
358
0
                    auto it = index_slots_map.find(to_lower(tindex_desc.columns[i]));
359
0
                    if (it != index_slots_map.end()) {
360
0
                        column_unique_ids[i] = it->second;
361
0
                    }
362
0
                }
363
0
                TabletIndex* ti = _obj_pool.add(new TabletIndex());
364
0
                ti->init_from_thrift(tindex_desc, column_unique_ids);
365
0
                index->indexes.emplace_back(ti);
366
0
            }
367
0
        }
368
41
        if (t_index.__isset.where_clause) {
369
0
            RETURN_IF_ERROR(
370
0
                    vectorized::VExpr::create_expr_tree(t_index.where_clause, index->where_clause));
371
0
        }
372
41
        _indexes.emplace_back(index);
373
41
    }
374
375
25
    std::sort(_indexes.begin(), _indexes.end(),
376
32
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
377
32
                  return lhs->index_id < rhs->index_id;
378
32
              });
379
25
    return Status::OK();
380
25
}
381
382
16
void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const {
383
16
    pschema->set_db_id(_db_id);
384
16
    pschema->set_table_id(_table_id);
385
16
    pschema->set_version(_version);
386
16
    pschema->set_unique_key_update_mode(_unique_key_update_mode);
387
16
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
388
        // for backward compatibility
389
0
        pschema->set_partial_update(true);
390
0
    }
391
16
    pschema->set_partial_update_new_key_policy(_partial_update_new_row_policy);
392
16
    pschema->set_is_strict_mode(_is_strict_mode);
393
16
    pschema->set_auto_increment_column(_auto_increment_column);
394
16
    pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id);
395
16
    pschema->set_timestamp_ms(_timestamp_ms);
396
16
    pschema->set_timezone(_timezone);
397
16
    pschema->set_nano_seconds(_nano_seconds);
398
16
    pschema->set_sequence_map_col_unique_id(_sequence_map_col_uid);
399
16
    for (auto col : _partial_update_input_columns) {
400
0
        *pschema->add_partial_update_input_columns() = col;
401
0
    }
402
16
    _tuple_desc->to_protobuf(pschema->mutable_tuple_desc());
403
96
    for (auto* slot : _tuple_desc->slots()) {
404
96
        slot->to_protobuf(pschema->add_slot_descs());
405
96
    }
406
32
    for (auto* index : _indexes) {
407
32
        index->to_protobuf(pschema->add_indexes());
408
32
    }
409
16
}
410
411
0
std::string OlapTableSchemaParam::debug_string() const {
412
0
    std::stringstream ss;
413
0
    ss << "tuple_desc=" << _tuple_desc->debug_string();
414
0
    return ss.str();
415
0
}
416
417
VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
418
                                                   const TOlapTablePartitionParam& t_param)
419
9
        : _schema(schema),
420
9
          _t_param(t_param),
421
9
          _slots(_schema->tuple_desc()->slots()),
422
9
          _mem_tracker(std::make_unique<MemTracker>("OlapTablePartitionParam")),
423
9
          _part_type(t_param.partition_type) {
424
9
    if (t_param.__isset.enable_automatic_partition && t_param.enable_automatic_partition) {
425
2
        _is_auto_partition = true;
426
2
        auto size = t_param.partition_function_exprs.size();
427
2
        _part_func_ctx.resize(size);
428
2
        _partition_function.resize(size);
429
2
        DCHECK((t_param.partition_type == TPartitionType::RANGE_PARTITIONED && size == 1) ||
430
0
               (t_param.partition_type == TPartitionType::LIST_PARTITIONED && size >= 1))
431
0
                << "now support only 1 partition column for auto range partitions. "
432
0
                << t_param.partition_type << " " << size;
433
4
        for (int i = 0; i < size; ++i) {
434
2
            Status st = vectorized::VExpr::create_expr_tree(t_param.partition_function_exprs[i],
435
2
                                                            _part_func_ctx[i]);
436
2
            if (!st.ok()) {
437
0
                throw Exception(Status::InternalError("Partition function expr is not valid"),
438
0
                                "Partition function expr is not valid");
439
0
            }
440
2
            _partition_function[i] = _part_func_ctx[i]->root();
441
2
        }
442
2
    }
443
444
9
    if (t_param.__isset.enable_auto_detect_overwrite && t_param.enable_auto_detect_overwrite) {
445
1
        _is_auto_detect_overwrite = true;
446
1
        DCHECK(t_param.__isset.overwrite_group_id);
447
1
        _overwrite_group_id = t_param.overwrite_group_id;
448
1
    }
449
450
9
    if (t_param.__isset.master_address) {
451
0
        _master_address = std::make_shared<TNetworkAddress>(t_param.master_address);
452
0
    }
453
454
9
    if (_is_auto_partition) {
455
        // the nullable mode depends on partition_exprs. not column slots. so use them.
456
2
        DCHECK(_partition_function.size() <= _slots.size())
457
0
                << _partition_function.size() << ", " << _slots.size();
458
459
        // suppose (k0, [k1], [k2]), so get [k1, 0], [k2, 1]
460
2
        std::map<std::string, int> partition_slots_map; // name to idx in part_exprs
461
4
        for (size_t i = 0; i < t_param.partition_columns.size(); i++) {
462
2
            partition_slots_map.emplace(t_param.partition_columns[i], i);
463
2
        }
464
465
        // here we rely on the same order and number of the _part_funcs and _slots in the prefix
466
        // _part_block contains all slots of table.
467
2
        for (auto* slot : _slots) {
468
            // try to replace with partition expr.
469
2
            if (auto it = partition_slots_map.find(slot->col_name());
470
2
                it != partition_slots_map.end()) { // it's a partition column slot
471
2
                auto& expr_type = _partition_function[it->second]->data_type();
472
2
                _partition_block.insert({expr_type->create_column(), expr_type, slot->col_name()});
473
2
            } else {
474
0
                _partition_block.insert({slot->get_empty_mutable_column(),
475
0
                                         slot->get_data_type_ptr(), slot->col_name()});
476
0
            }
477
2
        }
478
2
        VLOG_TRACE << _partition_block.dump_structure();
479
7
    } else {
480
        // we insert all. but not all will be used. it will controlled by _partition_slot_locs
481
7
        for (auto* slot : _slots) {
482
7
            _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(),
483
7
                                     slot->col_name()});
484
7
        }
485
7
    }
486
9
}
487
488
9
VOlapTablePartitionParam::~VOlapTablePartitionParam() {
489
9
    _mem_tracker->release(_mem_usage);
490
9
}
491
492
9
Status VOlapTablePartitionParam::init() {
493
9
    std::vector<std::string> slot_column_names;
494
9
    for (auto* slot_desc : _schema->tuple_desc()->slots()) {
495
9
        slot_column_names.emplace_back(slot_desc->col_name());
496
9
    }
497
498
9
    auto find_slot_locs = [&slot_column_names](const std::string& slot_name,
499
9
                                               std::vector<uint16_t>& locs,
500
17
                                               const std::string& column_type) {
501
17
        auto it = std::find(slot_column_names.begin(), slot_column_names.end(), slot_name);
502
17
        if (it == slot_column_names.end()) {
503
0
            return Status::InternalError("{} column not found, column ={}", column_type, slot_name);
504
0
        }
505
17
        locs.emplace_back(it - slot_column_names.begin());
506
17
        return Status::OK();
507
17
    };
508
509
    // here we find the partition columns. others maybe non-partition columns/special columns.
510
9
    if (_t_param.__isset.partition_columns) {
511
9
        for (auto& part_col : _t_param.partition_columns) {
512
9
            RETURN_IF_ERROR(find_slot_locs(part_col, _partition_slot_locs, "partition"));
513
9
        }
514
9
    }
515
516
9
    _partitions_map = std::make_unique<
517
9
            std::map<BlockRowWithIndicator, VOlapTablePartition*, VOlapTablePartKeyComparator>>(
518
9
            VOlapTablePartKeyComparator(_partition_slot_locs, _transformed_slot_locs));
519
9
    if (_t_param.__isset.distributed_columns) {
520
8
        for (auto& col : _t_param.distributed_columns) {
521
8
            RETURN_IF_ERROR(find_slot_locs(col, _distributed_slot_locs, "distributed"));
522
8
        }
523
8
    }
524
525
    // for both auto/non-auto partition table.
526
9
    _is_in_partition = _part_type == TPartitionType::type::LIST_PARTITIONED;
527
528
    // initial partitions. if meet dummy partitions only for open BE nodes, not generate key of them for finding
529
17
    for (const auto& t_part : _t_param.partitions) {
530
17
        VOlapTablePartition* part = nullptr;
531
17
        RETURN_IF_ERROR(generate_partition_from(t_part, part));
532
17
        _partitions.emplace_back(part);
533
534
17
        if (!_t_param.partitions_is_fake) {
535
17
            if (_is_in_partition) {
536
0
                for (auto& in_key : part->in_keys) {
537
0
                    _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
538
0
                }
539
17
            } else {
540
17
                _partitions_map->emplace(
541
17
                        std::tuple {part->end_key.first, part->end_key.second, false}, part);
542
17
            }
543
17
        }
544
17
    }
545
546
9
    _mem_usage = _partition_block.allocated_bytes();
547
9
    _mem_tracker->consume(_mem_usage);
548
9
    return Status::OK();
549
9
}
550
551
bool VOlapTablePartitionParam::_part_contains(VOlapTablePartition* part,
552
27
                                              BlockRowWithIndicator key) const {
553
27
    VOlapTablePartKeyComparator comparator(_partition_slot_locs, _transformed_slot_locs);
554
    // we have used upper_bound to find to ensure key < part.right and this part is closest(right - key is min)
555
    // now we only have to check (key >= part.left). the comparator(a,b) means a < b, so we use anti
556
27
    return part->start_key.second == -1 /* spj: start_key.second == -1 means only single partition*/
557
27
           || !comparator(key, std::tuple {part->start_key.first, part->start_key.second, false});
558
27
}
559
560
// insert value into _partition_block's column
561
// NOLINTBEGIN(readability-function-size)
562
38
static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, uint16_t pos) {
563
38
    auto column = std::move(*part_key->first->get_by_position(pos).column).mutate();
564
    //TODO: use assert_cast before insert_data
565
38
    switch (t_expr.node_type) {
566
0
    case TExprNodeType::DATE_LITERAL: {
567
0
        auto primitive_type = vectorized::DataTypeFactory::instance()
568
0
                                      .create_data_type(t_expr.type)
569
0
                                      ->get_primitive_type();
570
0
        if (primitive_type == TYPE_DATEV2) {
571
0
            DateV2Value<DateV2ValueType> dt;
572
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
573
0
                                  t_expr.date_literal.value.size())) {
574
0
                std::stringstream ss;
575
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
576
0
                return Status::InternalError(ss.str());
577
0
            }
578
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
579
0
        } else if (primitive_type == TYPE_DATETIMEV2) {
580
0
            DateV2Value<DateTimeV2ValueType> dt;
581
0
            const int32_t scale =
582
0
                    t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
583
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
584
0
                                  t_expr.date_literal.value.size(), scale)) {
585
0
                std::stringstream ss;
586
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
587
0
                return Status::InternalError(ss.str());
588
0
            }
589
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
590
0
        } else if (primitive_type == TYPE_TIMESTAMPTZ) {
591
0
            TimestampTzValue res;
592
0
            vectorized::CastParameters params {.status = Status::OK(), .is_strict = true};
593
0
            const int32_t scale =
594
0
                    t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
595
0
            if (!vectorized::CastToTimstampTz::from_string(
596
0
                        {t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size()}, res,
597
0
                        params, nullptr, scale)) [[unlikely]] {
598
0
                std::stringstream ss;
599
0
                ss << "invalid timestamptz literal in partition column, value="
600
0
                   << t_expr.date_literal;
601
0
                return Status::InternalError(ss.str());
602
0
            } else {
603
0
                column->insert_data(reinterpret_cast<const char*>(&res), 0);
604
0
            }
605
0
        } else {
606
            // TYPE_DATE (DATEV1) or TYPE_DATETIME (DATETIMEV1)
607
0
            VecDateTimeValue dt;
608
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
609
0
                                  t_expr.date_literal.value.size())) {
610
0
                std::stringstream ss;
611
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
612
0
                return Status::InternalError(ss.str());
613
0
            }
614
0
            if (vectorized::DataTypeFactory::instance()
615
0
                        .create_data_type(t_expr.type)
616
0
                        ->get_primitive_type() == TYPE_DATE) {
617
0
                dt.cast_to_date();
618
0
            }
619
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
620
0
        }
621
0
        break;
622
0
    }
623
38
    case TExprNodeType::INT_LITERAL: {
624
38
        switch (t_expr.type.types[0].scalar_type.type) {
625
0
        case TPrimitiveType::TINYINT: {
626
0
            auto value = cast_set<int8_t>(t_expr.int_literal.value);
627
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
628
0
            break;
629
0
        }
630
0
        case TPrimitiveType::SMALLINT: {
631
0
            auto value = cast_set<int16_t>(t_expr.int_literal.value);
632
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
633
0
            break;
634
0
        }
635
38
        case TPrimitiveType::INT: {
636
38
            auto value = cast_set<int32_t>(t_expr.int_literal.value);
637
38
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
638
38
            break;
639
0
        }
640
0
        default:
641
0
            int64_t value = t_expr.int_literal.value;
642
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
643
38
        }
644
38
        break;
645
38
    }
646
38
    case TExprNodeType::LARGE_INT_LITERAL: {
647
0
        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
648
0
        auto value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(),
649
0
                                                           t_expr.large_int_literal.value.size(),
650
0
                                                           &parse_result);
651
0
        if (parse_result != StringParser::PARSE_SUCCESS) {
652
0
            value = MAX_INT128;
653
0
        }
654
0
        column->insert_data(reinterpret_cast<const char*>(&value), 0);
655
0
        break;
656
38
    }
657
0
    case TExprNodeType::STRING_LITERAL: {
658
0
        size_t len = t_expr.string_literal.value.size();
659
0
        const char* str_val = t_expr.string_literal.value.c_str();
660
0
        column->insert_data(str_val, len);
661
0
        break;
662
38
    }
663
0
    case TExprNodeType::BOOL_LITERAL: {
664
0
        column->insert_data(reinterpret_cast<const char*>(&t_expr.bool_literal.value), 0);
665
0
        break;
666
38
    }
667
0
    case TExprNodeType::NULL_LITERAL: {
668
        // insert a null literal
669
0
        if (!column->is_nullable()) {
670
            // https://github.com/apache/doris/pull/39449 have forbid this cause. always add this check as protective measures
671
0
            return Status::InternalError("The column {} is not null, can't insert into NULL value.",
672
0
                                         part_key->first->get_by_position(pos).name);
673
0
        }
674
0
        column->insert_data(nullptr, 0);
675
0
        break;
676
0
    }
677
0
    default: {
678
0
        return Status::InternalError("unsupported partition column node type, type={}",
679
0
                                     t_expr.node_type);
680
0
    }
681
38
    }
682
38
    part_key->second = cast_set<int32_t>(column->size() - 1);
683
38
    return Status::OK();
684
38
}
685
// NOLINTEND(readability-function-size)
686
687
Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs,
688
38
                                                        BlockRow* part_key) {
689
76
    for (int i = 0; i < t_exprs.size(); i++) {
690
38
        RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i]));
691
38
    }
692
38
    return Status::OK();
693
38
}
694
695
Status VOlapTablePartitionParam::generate_partition_from(const TOlapTablePartition& t_part,
696
17
                                                         VOlapTablePartition*& part_result) {
697
17
    DCHECK(part_result == nullptr);
698
    // here we set the default value of partition bounds first! if it doesn't have some key, it will be -1.
699
17
    part_result = _obj_pool.add(new VOlapTablePartition(&_partition_block));
700
17
    part_result->id = t_part.id;
701
17
    part_result->is_mutable = t_part.is_mutable;
702
    // only load_to_single_tablet = true will set load_tablet_idx
703
17
    if (t_part.__isset.load_tablet_idx) {
704
1
        part_result->load_tablet_idx = t_part.load_tablet_idx;
705
1
    }
706
707
17
    if (_is_in_partition) {
708
0
        for (const auto& keys : t_part.in_keys) {
709
0
            RETURN_IF_ERROR(_create_partition_keys(
710
0
                    keys, &part_result->in_keys.emplace_back(&_partition_block, -1)));
711
0
        }
712
0
        if (t_part.__isset.is_default_partition && t_part.is_default_partition &&
713
0
            _default_partition == nullptr) {
714
0
            _default_partition = part_result;
715
0
        }
716
17
    } else { // range
717
17
        if (t_part.__isset.start_keys) {
718
17
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part_result->start_key));
719
17
        }
720
        // we generate the right bound but not insert into partition map
721
17
        if (t_part.__isset.end_keys) {
722
17
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part_result->end_key));
723
17
        }
724
17
    }
725
726
17
    part_result->num_buckets = t_part.num_buckets;
727
17
    auto num_indexes = _schema->indexes().size();
728
17
    if (t_part.indexes.size() != num_indexes) {
729
0
        return Status::InternalError(
730
0
                "number of partition's index is not equal with schema's"
731
0
                ", num_part_indexes={}, num_schema_indexes={}",
732
0
                t_part.indexes.size(), num_indexes);
733
0
    }
734
17
    part_result->indexes = t_part.indexes;
735
17
    std::sort(part_result->indexes.begin(), part_result->indexes.end(),
736
17
              [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
737
0
                  return lhs.index_id < rhs.index_id;
738
0
              });
739
    // check index
740
34
    for (int j = 0; j < num_indexes; ++j) {
741
17
        if (part_result->indexes[j].index_id != _schema->indexes()[j]->index_id) {
742
0
            return Status::InternalError(
743
0
                    "partition's index is not equal with schema's"
744
0
                    ", part_index={}, schema_index={}",
745
0
                    part_result->indexes[j].index_id, _schema->indexes()[j]->index_id);
746
0
        }
747
17
    }
748
17
    if (t_part.__isset.total_replica_num) {
749
0
        part_result->total_replica_num = t_part.total_replica_num;
750
0
    }
751
17
    if (t_part.__isset.load_required_replica_num) {
752
0
        part_result->load_required_replica_num = t_part.load_required_replica_num;
753
0
    }
754
17
    return Status::OK();
755
17
}
756
757
Status VOlapTablePartitionParam::add_partitions(
758
2
        const std::vector<TOlapTablePartition>& partitions) {
759
2
    for (const auto& t_part : partitions) {
760
2
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
761
2
        part->id = t_part.id;
762
2
        part->is_mutable = t_part.is_mutable;
763
764
        // we dont pass right keys when it's MAX_VALUE. so there's possibility we only have start_key but not end_key
765
        // range partition
766
2
        if (t_part.__isset.start_keys) {
767
2
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key));
768
2
        }
769
2
        if (t_part.__isset.end_keys) {
770
2
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part->end_key));
771
2
        }
772
        // list partition - we only set 1 value in 1 partition for new created ones
773
2
        if (t_part.__isset.in_keys) {
774
0
            for (const auto& keys : t_part.in_keys) {
775
0
                RETURN_IF_ERROR(_create_partition_keys(
776
0
                        keys, &part->in_keys.emplace_back(&_partition_block, -1)));
777
0
            }
778
0
            if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
779
0
                _default_partition = part;
780
0
            }
781
0
        }
782
783
2
        part->num_buckets = t_part.num_buckets;
784
2
        auto num_indexes = _schema->indexes().size();
785
2
        if (t_part.indexes.size() != num_indexes) {
786
0
            return Status::InternalError(
787
0
                    "number of partition's index is not equal with schema's"
788
0
                    ", num_part_indexes={}, num_schema_indexes={}",
789
0
                    t_part.indexes.size(), num_indexes);
790
0
        }
791
2
        part->indexes = t_part.indexes;
792
2
        std::sort(part->indexes.begin(), part->indexes.end(),
793
2
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
794
0
                      return lhs.index_id < rhs.index_id;
795
0
                  });
796
        // check index
797
4
        for (int j = 0; j < num_indexes; ++j) {
798
2
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
799
0
                return Status::InternalError(
800
0
                        "partition's index is not equal with schema's"
801
0
                        ", part_index={}, schema_index={}",
802
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
803
0
            }
804
2
        }
805
2
        _partitions.emplace_back(part);
806
        // after _creating_partiton_keys
807
2
        if (_is_in_partition) {
808
0
            for (auto& in_key : part->in_keys) {
809
0
                _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
810
0
            }
811
2
        } else {
812
2
            _partitions_map->emplace(std::tuple {part->end_key.first, part->end_key.second, false},
813
2
                                     part);
814
2
        }
815
2
    }
816
817
2
    return Status::OK();
818
2
}
819
820
Status VOlapTablePartitionParam::replace_partitions(
821
        std::vector<int64_t>& old_partition_ids,
822
1
        const std::vector<TOlapTablePartition>& new_partitions) {
823
    // remove old replaced partitions
824
1
    DCHECK(old_partition_ids.size() == new_partitions.size());
825
826
    // init and add new partitions. insert into _partitions
827
3
    for (int i = 0; i < new_partitions.size(); i++) {
828
2
        const auto& t_part = new_partitions[i];
829
        // pair old_partition_ids and new_partitions one by one. TODO: sort to opt performance
830
2
        VOlapTablePartition* old_part = nullptr;
831
2
        auto old_part_id = old_partition_ids[i];
832
2
        if (auto it = std::find_if(
833
2
                    _partitions.begin(), _partitions.end(),
834
3
                    [=](const VOlapTablePartition* lhs) { return lhs->id == old_part_id; });
835
2
            it != _partitions.end()) {
836
2
            old_part = *it;
837
2
        } else {
838
0
            return Status::InternalError("Cannot find old tablet {} in replacing", old_part_id);
839
0
        }
840
841
2
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
842
2
        part->id = t_part.id;
843
2
        part->is_mutable = t_part.is_mutable;
844
845
        /// just substitute directly. no need to remove and reinsert keys.
846
        // range partition
847
2
        part->start_key = std::move(old_part->start_key);
848
2
        part->end_key = std::move(old_part->end_key);
849
        // list partition
850
2
        part->in_keys = std::move(old_part->in_keys);
851
2
        if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
852
0
            _default_partition = part;
853
0
        }
854
855
2
        part->num_buckets = t_part.num_buckets;
856
2
        auto num_indexes = _schema->indexes().size();
857
2
        if (t_part.indexes.size() != num_indexes) {
858
0
            return Status::InternalError(
859
0
                    "number of partition's index is not equal with schema's"
860
0
                    ", num_part_indexes={}, num_schema_indexes={}",
861
0
                    t_part.indexes.size(), num_indexes);
862
0
        }
863
2
        part->indexes = t_part.indexes;
864
2
        std::sort(part->indexes.begin(), part->indexes.end(),
865
2
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
866
0
                      return lhs.index_id < rhs.index_id;
867
0
                  });
868
        // check index
869
4
        for (int j = 0; j < num_indexes; ++j) {
870
2
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
871
0
                return Status::InternalError(
872
0
                        "partition's index is not equal with schema's"
873
0
                        ", part_index={}, schema_index={}",
874
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
875
0
            }
876
2
        }
877
878
        // add new partitions with new id.
879
2
        _partitions.emplace_back(part);
880
2
        VLOG_NOTICE << "params add new partition " << part->id;
881
882
        // replace items in _partition_maps
883
2
        if (_is_in_partition) {
884
0
            for (auto& in_key : part->in_keys) {
885
0
                (*_partitions_map)[std::tuple {in_key.first, in_key.second, false}] = part;
886
0
            }
887
2
        } else {
888
2
            (*_partitions_map)[std::tuple {part->end_key.first, part->end_key.second, false}] =
889
2
                    part;
890
2
        }
891
2
    }
892
    // remove old partitions by id
893
1
    std::ranges::sort(old_partition_ids);
894
5
    for (auto it = _partitions.begin(); it != _partitions.end();) {
895
4
        if (std::ranges::binary_search(old_partition_ids, (*it)->id)) {
896
2
            it = _partitions.erase(it);
897
2
        } else {
898
2
            it++;
899
2
        }
900
4
    }
901
902
1
    return Status::OK();
903
1
}
904
#include "common/compile_check_end.h"
905
906
} // namespace doris