Coverage Report

Created: 2025-05-02 01:45

/root/doris/be/src/exec/tablet_info.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/tablet_info.h"
19
20
#include <butil/logging.h>
21
#include <gen_cpp/Descriptors_types.h>
22
#include <gen_cpp/Exprs_types.h>
23
#include <gen_cpp/Partitions_types.h>
24
#include <gen_cpp/Types_types.h>
25
#include <gen_cpp/descriptors.pb.h>
26
#include <glog/logging.h>
27
28
#include <algorithm>
29
#include <cstddef>
30
#include <cstdint>
31
#include <memory>
32
#include <ostream>
33
#include <string>
34
#include <tuple>
35
36
#include "common/exception.h"
37
#include "common/logging.h"
38
#include "common/status.h"
39
#include "olap/tablet_schema.h"
40
#include "runtime/define_primitive_type.h"
41
#include "runtime/descriptors.h"
42
#include "runtime/large_int_value.h"
43
#include "runtime/memory/mem_tracker.h"
44
#include "runtime/primitive_type.h"
45
#include "runtime/raw_value.h"
46
#include "runtime/types.h"
47
#include "util/string_parser.hpp"
48
#include "util/string_util.h"
49
#include "vec/columns/column.h"
50
// NOLINTNEXTLINE(unused-includes)
51
#include "vec/exprs/vexpr_context.h" // IWYU pragma: keep
52
#include "vec/exprs/vliteral.h"
53
#include "vec/runtime/vdatetime_value.h"
54
55
namespace doris {
56
57
32
void OlapTableIndexSchema::to_protobuf(POlapTableIndexSchema* pindex) const {
58
32
    pindex->set_id(index_id);
59
32
    pindex->set_schema_hash(schema_hash);
60
32
    for (auto* slot : slots) {
61
0
        pindex->add_columns(slot->col_name());
62
0
    }
63
32
    for (auto* column : columns) {
64
0
        column->to_schema_pb(pindex->add_columns_desc());
65
0
    }
66
32
    for (auto* index : indexes) {
67
0
        index->to_schema_pb(pindex->add_indexes_desc());
68
0
    }
69
32
}
70
71
bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator& lhs,
72
0
                                             const BlockRowWithIndicator& rhs) const {
73
0
    vectorized::Block* l_block = std::get<0>(lhs);
74
0
    vectorized::Block* r_block = std::get<0>(rhs);
75
0
    int32_t l_row = std::get<1>(lhs);
76
0
    int32_t r_row = std::get<1>(rhs);
77
0
    bool l_use_new = std::get<2>(lhs);
78
0
    bool r_use_new = std::get<2>(rhs);
79
80
0
    VLOG_TRACE << '\n' << l_block->dump_data() << '\n' << r_block->dump_data();
81
82
0
    if (l_row == -1) {
83
0
        return false;
84
0
    } else if (r_row == -1) {
85
0
        return true;
86
0
    }
87
88
0
    if (_param_locs.empty()) { // no transform, use origin column
89
0
        for (auto slot_loc : _slot_locs) {
90
0
            auto res = l_block->get_by_position(slot_loc).column->compare_at(
91
0
                    l_row, r_row, *r_block->get_by_position(slot_loc).column, -1);
92
0
            if (res != 0) {
93
0
                return res < 0;
94
0
            }
95
0
        }
96
0
    } else { // use transformed column to compare
97
0
        DCHECK(_slot_locs.size() == _param_locs.size())
98
0
                << _slot_locs.size() << ' ' << _param_locs.size();
99
100
0
        const std::vector<uint16_t>* l_index = l_use_new ? &_param_locs : &_slot_locs;
101
0
        const std::vector<uint16_t>* r_index = r_use_new ? &_param_locs : &_slot_locs;
102
103
0
        for (int i = 0; i < _slot_locs.size(); i++) {
104
0
            vectorized::ColumnPtr l_col = l_block->get_by_position((*l_index)[i]).column;
105
0
            vectorized::ColumnPtr r_col = r_block->get_by_position((*r_index)[i]).column;
106
107
0
            auto res = l_col->compare_at(l_row, r_row, *r_col, -1);
108
0
            if (res != 0) {
109
0
                return res < 0;
110
0
            }
111
0
        }
112
0
    }
113
114
    // equal, return false
115
0
    return false;
116
0
}
117
118
14
Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) {
119
14
    _db_id = pschema.db_id();
120
14
    _table_id = pschema.table_id();
121
14
    _version = pschema.version();
122
14
    _is_partial_update = pschema.partial_update();
123
14
    _is_strict_mode = pschema.is_strict_mode();
124
14
    if (_is_partial_update) {
125
0
        _auto_increment_column = pschema.auto_increment_column();
126
0
        if (!_auto_increment_column.empty() && pschema.auto_increment_column_unique_id() == -1) {
127
0
            return Status::InternalError(
128
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
129
0
                    "different from BE.");
130
0
        }
131
0
        _auto_increment_column_unique_id = pschema.auto_increment_column_unique_id();
132
0
    }
133
14
    _timestamp_ms = pschema.timestamp_ms();
134
14
    if (pschema.has_nano_seconds()) {
135
14
        _nano_seconds = pschema.nano_seconds();
136
14
    }
137
14
    _timezone = pschema.timezone();
138
139
14
    for (const auto& col : pschema.partial_update_input_columns()) {
140
0
        _partial_update_input_columns.insert(col);
141
0
    }
142
14
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
143
144
14
    _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc()));
145
146
84
    for (const auto& p_slot_desc : pschema.slot_descs()) {
147
84
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc));
148
84
        _tuple_desc->add_slot(slot_desc);
149
84
        string data_type;
150
84
        EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), data_type);
151
84
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
152
84
        std::string data_type_str =
153
84
                std::to_string(int64_t(TabletColumn::get_field_type_by_string(data_type)));
154
84
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
155
84
                          slot_desc);
156
84
    }
157
158
28
    for (const auto& p_index : pschema.indexes()) {
159
28
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
160
28
        index->index_id = p_index.id();
161
28
        index->schema_hash = p_index.schema_hash();
162
28
        for (const auto& pcolumn_desc : p_index.columns_desc()) {
163
0
            if (!_is_partial_update ||
164
0
                _partial_update_input_columns.contains(pcolumn_desc.name())) {
165
0
                std::string is_null_str = pcolumn_desc.is_nullable() ? "true" : "false";
166
0
                std::string data_type_str = std::to_string(
167
0
                        int64_t(TabletColumn::get_field_type_by_string(pcolumn_desc.type())));
168
0
                auto it = slots_map.find(to_lower(pcolumn_desc.name()) + "+" + data_type_str +
169
0
                                         is_null_str);
170
0
                if (it == std::end(slots_map)) {
171
0
                    std::string keys {};
172
0
                    for (const auto& [key, _] : slots_map) {
173
0
                        keys += fmt::format("{},", key);
174
0
                    }
175
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
176
0
                            "[OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema)]: "
177
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
178
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\npschema={}",
179
0
                            pcolumn_desc.name(), pcolumn_desc.type(), data_type_str, is_null_str,
180
0
                            keys, debug_string(), pschema.ShortDebugString());
181
182
0
                    return Status::InternalError("unknown index column, column={}, type={}",
183
0
                                                 pcolumn_desc.name(), pcolumn_desc.type());
184
0
                }
185
0
                index->slots.emplace_back(it->second);
186
0
            }
187
0
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
188
0
            tc->init_from_pb(pcolumn_desc);
189
0
            index->columns.emplace_back(tc);
190
0
        }
191
28
        for (const auto& pindex_desc : p_index.indexes_desc()) {
192
0
            TabletIndex* ti = _obj_pool.add(new TabletIndex());
193
0
            ti->init_from_pb(pindex_desc);
194
0
            index->indexes.emplace_back(ti);
195
0
        }
196
28
        _indexes.emplace_back(index);
197
28
    }
198
199
14
    std::sort(_indexes.begin(), _indexes.end(),
200
28
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
201
28
                  return lhs->index_id < rhs->index_id;
202
28
              });
203
14
    return Status::OK();
204
14
}
205
206
16
Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) {
207
16
    _db_id = tschema.db_id;
208
16
    _table_id = tschema.table_id;
209
16
    _version = tschema.version;
210
16
    _is_partial_update = tschema.is_partial_update;
211
16
    if (tschema.__isset.is_strict_mode) {
212
16
        _is_strict_mode = tschema.is_strict_mode;
213
16
    }
214
16
    if (_is_partial_update) {
215
0
        _auto_increment_column = tschema.auto_increment_column;
216
0
        if (!_auto_increment_column.empty() && tschema.auto_increment_column_unique_id == -1) {
217
0
            return Status::InternalError(
218
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
219
0
                    "different from BE.");
220
0
        }
221
0
        _auto_increment_column_unique_id = tschema.auto_increment_column_unique_id;
222
0
    }
223
224
16
    for (const auto& tcolumn : tschema.partial_update_input_columns) {
225
0
        _partial_update_input_columns.insert(tcolumn);
226
0
    }
227
16
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
228
16
    _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc));
229
96
    for (const auto& t_slot_desc : tschema.slot_descs) {
230
96
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc));
231
96
        _tuple_desc->add_slot(slot_desc);
232
96
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
233
96
        std::string data_type_str = std::to_string(int64_t(slot_desc->col_type()));
234
96
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
235
96
                          slot_desc);
236
96
    }
237
238
32
    for (const auto& t_index : tschema.indexes) {
239
32
        std::unordered_map<std::string, int32_t> index_slots_map;
240
32
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
241
32
        index->index_id = t_index.id;
242
32
        index->schema_hash = t_index.schema_hash;
243
32
        for (const auto& tcolumn_desc : t_index.columns_desc) {
244
0
            if (!_is_partial_update ||
245
0
                _partial_update_input_columns.contains(tcolumn_desc.column_name)) {
246
0
                std::string is_null_str = tcolumn_desc.is_allow_null ? "true" : "false";
247
0
                std::string data_type_str =
248
0
                        std::to_string(int64_t(thrift_to_type(tcolumn_desc.column_type.type)));
249
0
                auto it = slots_map.find(to_lower(tcolumn_desc.column_name) + "+" + data_type_str +
250
0
                                         is_null_str);
251
0
                if (it == slots_map.end()) {
252
0
                    std::stringstream ss;
253
0
                    ss << tschema;
254
0
                    std::string keys {};
255
0
                    for (const auto& [key, _] : slots_map) {
256
0
                        keys += fmt::format("{},", key);
257
0
                    }
258
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
259
0
                            "[OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema)]: "
260
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
261
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\ntschema={}",
262
0
                            tcolumn_desc.column_name, tcolumn_desc.column_type.type, data_type_str,
263
0
                            is_null_str, keys, debug_string(), ss.str());
264
0
                    return Status::InternalError("unknown index column, column={}, type={}",
265
0
                                                 tcolumn_desc.column_name,
266
0
                                                 tcolumn_desc.column_type.type);
267
0
                }
268
0
                index->slots.emplace_back(it->second);
269
0
            }
270
0
            index_slots_map.emplace(to_lower(tcolumn_desc.column_name), tcolumn_desc.col_unique_id);
271
0
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
272
0
            tc->init_from_thrift(tcolumn_desc);
273
0
            index->columns.emplace_back(tc);
274
0
        }
275
32
        if (t_index.__isset.indexes_desc) {
276
0
            for (const auto& tindex_desc : t_index.indexes_desc) {
277
0
                std::vector<int32_t> column_unique_ids(tindex_desc.columns.size());
278
0
                for (size_t i = 0; i < tindex_desc.columns.size(); i++) {
279
0
                    auto it = index_slots_map.find(to_lower(tindex_desc.columns[i]));
280
0
                    if (it != index_slots_map.end()) {
281
0
                        column_unique_ids[i] = it->second;
282
0
                    }
283
0
                }
284
0
                TabletIndex* ti = _obj_pool.add(new TabletIndex());
285
0
                ti->init_from_thrift(tindex_desc, column_unique_ids);
286
0
                index->indexes.emplace_back(ti);
287
0
            }
288
0
        }
289
32
        if (t_index.__isset.where_clause) {
290
0
            RETURN_IF_ERROR(
291
0
                    vectorized::VExpr::create_expr_tree(t_index.where_clause, index->where_clause));
292
0
        }
293
32
        _indexes.emplace_back(index);
294
32
    }
295
296
16
    std::sort(_indexes.begin(), _indexes.end(),
297
32
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
298
32
                  return lhs->index_id < rhs->index_id;
299
32
              });
300
16
    return Status::OK();
301
16
}
302
303
16
void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const {
304
16
    pschema->set_db_id(_db_id);
305
16
    pschema->set_table_id(_table_id);
306
16
    pschema->set_version(_version);
307
16
    pschema->set_partial_update(_is_partial_update);
308
16
    pschema->set_is_strict_mode(_is_strict_mode);
309
16
    pschema->set_auto_increment_column(_auto_increment_column);
310
16
    pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id);
311
16
    pschema->set_timestamp_ms(_timestamp_ms);
312
16
    pschema->set_timezone(_timezone);
313
16
    pschema->set_nano_seconds(_nano_seconds);
314
16
    for (auto col : _partial_update_input_columns) {
315
0
        *pschema->add_partial_update_input_columns() = col;
316
0
    }
317
16
    _tuple_desc->to_protobuf(pschema->mutable_tuple_desc());
318
96
    for (auto* slot : _tuple_desc->slots()) {
319
96
        slot->to_protobuf(pschema->add_slot_descs());
320
96
    }
321
32
    for (auto* index : _indexes) {
322
32
        index->to_protobuf(pschema->add_indexes());
323
32
    }
324
16
}
325
326
0
std::string OlapTableSchemaParam::debug_string() const {
327
0
    std::stringstream ss;
328
0
    ss << "tuple_desc=" << _tuple_desc->debug_string();
329
0
    return ss.str();
330
0
}
331
332
VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
333
                                                   const TOlapTablePartitionParam& t_param)
334
        : _schema(schema),
335
          _t_param(t_param),
336
          _slots(_schema->tuple_desc()->slots()),
337
          _mem_tracker(std::make_unique<MemTracker>("OlapTablePartitionParam")),
338
0
          _part_type(t_param.partition_type) {
339
0
    if (t_param.__isset.enable_automatic_partition && t_param.enable_automatic_partition) {
340
0
        _is_auto_partition = true;
341
0
        auto size = t_param.partition_function_exprs.size();
342
0
        _part_func_ctx.resize(size);
343
0
        _partition_function.resize(size);
344
0
        DCHECK((t_param.partition_type == TPartitionType::RANGE_PARTITIONED && size == 1) ||
345
0
               (t_param.partition_type == TPartitionType::LIST_PARTITIONED && size >= 1))
346
0
                << "now support only 1 partition column for auto range partitions. "
347
0
                << t_param.partition_type << " " << size;
348
0
        for (int i = 0; i < size; ++i) {
349
0
            Status st = vectorized::VExpr::create_expr_tree(t_param.partition_function_exprs[i],
350
0
                                                            _part_func_ctx[i]);
351
0
            if (!st.ok()) {
352
0
                throw Exception(Status::InternalError("Partition function expr is not valid"),
353
0
                                "Partition function expr is not valid");
354
0
            }
355
0
            _partition_function[i] = _part_func_ctx[i]->root();
356
0
        }
357
0
    }
358
359
0
    if (t_param.__isset.enable_auto_detect_overwrite && t_param.enable_auto_detect_overwrite) {
360
0
        _is_auto_detect_overwrite = true;
361
0
        DCHECK(t_param.__isset.overwrite_group_id);
362
0
        _overwrite_group_id = t_param.overwrite_group_id;
363
0
    }
364
365
0
    if (_is_auto_partition) {
366
        // the nullable mode depends on partition_exprs. not column slots. so use them.
367
0
        DCHECK(_partition_function.size() <= _slots.size())
368
0
                << _partition_function.size() << ", " << _slots.size();
369
370
        // suppose (k0, [k1], [k2]), so get [k1, 0], [k2, 1]
371
0
        std::map<std::string, int> partition_slots_map; // name to idx in part_exprs
372
0
        for (size_t i = 0; i < t_param.partition_columns.size(); i++) {
373
0
            partition_slots_map.emplace(t_param.partition_columns[i], i);
374
0
        }
375
376
        // here we rely on the same order and number of the _part_funcs and _slots in the prefix
377
        // _part_block contains all slots of table.
378
0
        for (auto* slot : _slots) {
379
            // try to replace with partition expr.
380
0
            if (auto it = partition_slots_map.find(slot->col_name());
381
0
                it != partition_slots_map.end()) { // it's a partition column slot
382
0
                auto& expr_type = _partition_function[it->second]->data_type();
383
0
                _partition_block.insert({expr_type->create_column(), expr_type, slot->col_name()});
384
0
            } else {
385
0
                _partition_block.insert({slot->get_empty_mutable_column(),
386
0
                                         slot->get_data_type_ptr(), slot->col_name()});
387
0
            }
388
0
        }
389
0
        VLOG_TRACE << _partition_block.dump_structure();
390
0
    } else {
391
        // we insert all. but not all will be used. it will controlled by _partition_slot_locs
392
0
        for (auto* slot : _slots) {
393
0
            _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(),
394
0
                                     slot->col_name()});
395
0
        }
396
0
    }
397
0
}
398
399
0
VOlapTablePartitionParam::~VOlapTablePartitionParam() {
400
0
    _mem_tracker->release(_mem_usage);
401
0
}
402
403
0
Status VOlapTablePartitionParam::init() {
404
0
    std::vector<std::string> slot_column_names;
405
0
    for (auto* slot_desc : _schema->tuple_desc()->slots()) {
406
0
        slot_column_names.emplace_back(slot_desc->col_name());
407
0
    }
408
409
0
    auto find_slot_locs = [&slot_column_names](const std::string& slot_name,
410
0
                                               std::vector<uint16_t>& locs,
411
0
                                               const std::string& column_type) {
412
0
        auto it = std::find(slot_column_names.begin(), slot_column_names.end(), slot_name);
413
0
        if (it == slot_column_names.end()) {
414
0
            return Status::InternalError("{} column not found, column ={}", column_type, slot_name);
415
0
        }
416
0
        locs.emplace_back(it - slot_column_names.begin());
417
0
        return Status::OK();
418
0
    };
419
420
    // here we find the partition columns. others maybe non-partition columns/special columns.
421
0
    if (_t_param.__isset.partition_columns) {
422
0
        for (auto& part_col : _t_param.partition_columns) {
423
0
            RETURN_IF_ERROR(find_slot_locs(part_col, _partition_slot_locs, "partition"));
424
0
        }
425
0
    }
426
427
0
    _partitions_map = std::make_unique<
428
0
            std::map<BlockRowWithIndicator, VOlapTablePartition*, VOlapTablePartKeyComparator>>(
429
0
            VOlapTablePartKeyComparator(_partition_slot_locs, _transformed_slot_locs));
430
0
    if (_t_param.__isset.distributed_columns) {
431
0
        for (auto& col : _t_param.distributed_columns) {
432
0
            RETURN_IF_ERROR(find_slot_locs(col, _distributed_slot_locs, "distributed"));
433
0
        }
434
0
    }
435
436
    // for both auto/non-auto partition table.
437
0
    _is_in_partition = _part_type == TPartitionType::type::LIST_PARTITIONED;
438
439
    // initial partitions. if meet dummy partitions only for open BE nodes, not generate key of them for finding
440
0
    for (const auto& t_part : _t_param.partitions) {
441
0
        VOlapTablePartition* part = nullptr;
442
0
        RETURN_IF_ERROR(generate_partition_from(t_part, part));
443
0
        _partitions.emplace_back(part);
444
445
0
        if (!_t_param.partitions_is_fake) {
446
0
            if (_is_in_partition) {
447
0
                for (auto& in_key : part->in_keys) {
448
0
                    _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
449
0
                }
450
0
            } else {
451
0
                _partitions_map->emplace(
452
0
                        std::tuple {part->end_key.first, part->end_key.second, false}, part);
453
0
            }
454
0
        }
455
0
    }
456
457
0
    _mem_usage = _partition_block.allocated_bytes();
458
0
    _mem_tracker->consume(_mem_usage);
459
0
    return Status::OK();
460
0
}
461
462
bool VOlapTablePartitionParam::_part_contains(VOlapTablePartition* part,
463
0
                                              BlockRowWithIndicator key) const {
464
0
    VOlapTablePartKeyComparator comparator(_partition_slot_locs, _transformed_slot_locs);
465
    // we have used upper_bound to find to ensure key < part.right and this part is closest(right - key is min)
466
    // now we only have to check (key >= part.left). the comparator(a,b) means a < b, so we use anti
467
0
    return part->start_key.second == -1 /* spj: start_key.second == -1 means only single partition*/
468
0
           || !comparator(key, std::tuple {part->start_key.first, part->start_key.second, false});
469
0
}
470
471
// insert value into _partition_block's column
472
// NOLINTBEGIN(readability-function-size)
473
0
static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, uint16_t pos) {
474
0
    auto column = std::move(*part_key->first->get_by_position(pos).column).mutate();
475
    //TODO: use assert_cast before insert_data
476
0
    switch (t_expr.node_type) {
477
0
    case TExprNodeType::DATE_LITERAL: {
478
0
        if (TypeDescriptor::from_thrift(t_expr.type).is_date_v2_type()) {
479
0
            DateV2Value<DateV2ValueType> dt;
480
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
481
0
                                  t_expr.date_literal.value.size())) {
482
0
                std::stringstream ss;
483
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
484
0
                return Status::InternalError(ss.str());
485
0
            }
486
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
487
0
        } else if (TypeDescriptor::from_thrift(t_expr.type).is_datetime_v2_type()) {
488
0
            DateV2Value<DateTimeV2ValueType> dt;
489
0
            const int32_t scale =
490
0
                    t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
491
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
492
0
                                  t_expr.date_literal.value.size(), scale)) {
493
0
                std::stringstream ss;
494
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
495
0
                return Status::InternalError(ss.str());
496
0
            }
497
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
498
0
        } else {
499
0
            VecDateTimeValue dt;
500
0
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
501
0
                                  t_expr.date_literal.value.size())) {
502
0
                std::stringstream ss;
503
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
504
0
                return Status::InternalError(ss.str());
505
0
            }
506
0
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
507
0
        }
508
0
        break;
509
0
    }
510
0
    case TExprNodeType::INT_LITERAL: {
511
0
        switch (t_expr.type.types[0].scalar_type.type) {
512
0
        case TPrimitiveType::TINYINT: {
513
0
            int8_t value = t_expr.int_literal.value;
514
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
515
0
            break;
516
0
        }
517
0
        case TPrimitiveType::SMALLINT: {
518
0
            int16_t value = t_expr.int_literal.value;
519
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
520
0
            break;
521
0
        }
522
0
        case TPrimitiveType::INT: {
523
0
            int32_t value = t_expr.int_literal.value;
524
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
525
0
            break;
526
0
        }
527
0
        default:
528
0
            int64_t value = t_expr.int_literal.value;
529
0
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
530
0
        }
531
0
        break;
532
0
    }
533
0
    case TExprNodeType::LARGE_INT_LITERAL: {
534
0
        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
535
0
        auto value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(),
536
0
                                                           t_expr.large_int_literal.value.size(),
537
0
                                                           &parse_result);
538
0
        if (parse_result != StringParser::PARSE_SUCCESS) {
539
0
            value = MAX_INT128;
540
0
        }
541
0
        column->insert_data(reinterpret_cast<const char*>(&value), 0);
542
0
        break;
543
0
    }
544
0
    case TExprNodeType::STRING_LITERAL: {
545
0
        int len = t_expr.string_literal.value.size();
546
0
        const char* str_val = t_expr.string_literal.value.c_str();
547
0
        column->insert_data(str_val, len);
548
0
        break;
549
0
    }
550
0
    case TExprNodeType::BOOL_LITERAL: {
551
0
        column->insert_data(reinterpret_cast<const char*>(&t_expr.bool_literal.value), 0);
552
0
        break;
553
0
    }
554
0
    case TExprNodeType::NULL_LITERAL: {
555
        // insert a null literal
556
0
        if (!column->is_nullable()) {
557
            // https://github.com/apache/doris/pull/39449 have forbid this cause. always add this check as protective measures
558
0
            return Status::InternalError("The column {} is not null, can't insert into NULL value.",
559
0
                                         part_key->first->get_by_position(pos).name);
560
0
        }
561
0
        column->insert_data(nullptr, 0);
562
0
        break;
563
0
    }
564
0
    default: {
565
0
        return Status::InternalError("unsupported partition column node type, type={}",
566
0
                                     t_expr.node_type);
567
0
    }
568
0
    }
569
0
    part_key->second = column->size() - 1;
570
0
    return Status::OK();
571
0
}
572
// NOLINTEND(readability-function-size)
573
574
Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs,
575
0
                                                        BlockRow* part_key) {
576
0
    for (int i = 0; i < t_exprs.size(); i++) {
577
0
        RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i]));
578
0
    }
579
0
    return Status::OK();
580
0
}
581
582
Status VOlapTablePartitionParam::generate_partition_from(const TOlapTablePartition& t_part,
583
0
                                                         VOlapTablePartition*& part_result) {
584
0
    DCHECK(part_result == nullptr);
585
    // here we set the default value of partition bounds first! if it doesn't have some key, it will be -1.
586
0
    part_result = _obj_pool.add(new VOlapTablePartition(&_partition_block));
587
0
    part_result->id = t_part.id;
588
0
    part_result->is_mutable = t_part.is_mutable;
589
    // only load_to_single_tablet = true will set load_tablet_idx
590
0
    if (t_part.__isset.load_tablet_idx) {
591
0
        part_result->load_tablet_idx = t_part.load_tablet_idx;
592
0
    }
593
594
0
    if (_is_in_partition) {
595
0
        for (const auto& keys : t_part.in_keys) {
596
0
            RETURN_IF_ERROR(_create_partition_keys(
597
0
                    keys, &part_result->in_keys.emplace_back(&_partition_block, -1)));
598
0
        }
599
0
        if (t_part.__isset.is_default_partition && t_part.is_default_partition &&
600
0
            _default_partition == nullptr) {
601
0
            _default_partition = part_result;
602
0
        }
603
0
    } else { // range
604
0
        if (t_part.__isset.start_keys) {
605
0
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part_result->start_key));
606
0
        }
607
        // we generate the right bound but not insert into partition map
608
0
        if (t_part.__isset.end_keys) {
609
0
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part_result->end_key));
610
0
        }
611
0
    }
612
613
0
    part_result->num_buckets = t_part.num_buckets;
614
0
    auto num_indexes = _schema->indexes().size();
615
0
    if (t_part.indexes.size() != num_indexes) {
616
0
        return Status::InternalError(
617
0
                "number of partition's index is not equal with schema's"
618
0
                ", num_part_indexes={}, num_schema_indexes={}",
619
0
                t_part.indexes.size(), num_indexes);
620
0
    }
621
0
    part_result->indexes = t_part.indexes;
622
0
    std::sort(part_result->indexes.begin(), part_result->indexes.end(),
623
0
              [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
624
0
                  return lhs.index_id < rhs.index_id;
625
0
              });
626
    // check index
627
0
    for (int j = 0; j < num_indexes; ++j) {
628
0
        if (part_result->indexes[j].index_id != _schema->indexes()[j]->index_id) {
629
0
            return Status::InternalError(
630
0
                    "partition's index is not equal with schema's"
631
0
                    ", part_index={}, schema_index={}",
632
0
                    part_result->indexes[j].index_id, _schema->indexes()[j]->index_id);
633
0
        }
634
0
    }
635
0
    return Status::OK();
636
0
}
637
638
Status VOlapTablePartitionParam::add_partitions(
639
0
        const std::vector<TOlapTablePartition>& partitions) {
640
0
    for (const auto& t_part : partitions) {
641
0
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
642
0
        part->id = t_part.id;
643
0
        part->is_mutable = t_part.is_mutable;
644
645
        // we dont pass right keys when it's MAX_VALUE. so there's possibility we only have start_key but not end_key
646
        // range partition
647
0
        if (t_part.__isset.start_keys) {
648
0
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key));
649
0
        }
650
0
        if (t_part.__isset.end_keys) {
651
0
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part->end_key));
652
0
        }
653
        // list partition - we only set 1 value in 1 partition for new created ones
654
0
        if (t_part.__isset.in_keys) {
655
0
            for (const auto& keys : t_part.in_keys) {
656
0
                RETURN_IF_ERROR(_create_partition_keys(
657
0
                        keys, &part->in_keys.emplace_back(&_partition_block, -1)));
658
0
            }
659
0
            if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
660
0
                _default_partition = part;
661
0
            }
662
0
        }
663
664
0
        part->num_buckets = t_part.num_buckets;
665
0
        auto num_indexes = _schema->indexes().size();
666
0
        if (t_part.indexes.size() != num_indexes) {
667
0
            return Status::InternalError(
668
0
                    "number of partition's index is not equal with schema's"
669
0
                    ", num_part_indexes={}, num_schema_indexes={}",
670
0
                    t_part.indexes.size(), num_indexes);
671
0
        }
672
0
        part->indexes = t_part.indexes;
673
0
        std::sort(part->indexes.begin(), part->indexes.end(),
674
0
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
675
0
                      return lhs.index_id < rhs.index_id;
676
0
                  });
677
        // check index
678
0
        for (int j = 0; j < num_indexes; ++j) {
679
0
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
680
0
                return Status::InternalError(
681
0
                        "partition's index is not equal with schema's"
682
0
                        ", part_index={}, schema_index={}",
683
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
684
0
            }
685
0
        }
686
0
        _partitions.emplace_back(part);
687
        // after _creating_partiton_keys
688
0
        if (_is_in_partition) {
689
0
            for (auto& in_key : part->in_keys) {
690
0
                _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
691
0
            }
692
0
        } else {
693
0
            _partitions_map->emplace(std::tuple {part->end_key.first, part->end_key.second, false},
694
0
                                     part);
695
0
        }
696
0
    }
697
698
0
    return Status::OK();
699
0
}
700
701
Status VOlapTablePartitionParam::replace_partitions(
702
        std::vector<int64_t>& old_partition_ids,
703
0
        const std::vector<TOlapTablePartition>& new_partitions) {
704
    // remove old replaced partitions
705
0
    DCHECK(old_partition_ids.size() == new_partitions.size());
706
707
    // init and add new partitions. insert into _partitions
708
0
    for (int i = 0; i < new_partitions.size(); i++) {
709
0
        const auto& t_part = new_partitions[i];
710
        // pair old_partition_ids and new_partitions one by one. TODO: sort to opt performance
711
0
        VOlapTablePartition* old_part = nullptr;
712
0
        auto old_part_id = old_partition_ids[i];
713
0
        if (auto it = std::find_if(
714
0
                    _partitions.begin(), _partitions.end(),
715
0
                    [=](const VOlapTablePartition* lhs) { return lhs->id == old_part_id; });
716
0
            it != _partitions.end()) {
717
0
            old_part = *it;
718
0
        } else {
719
0
            return Status::InternalError("Cannot find old tablet {} in replacing", old_part_id);
720
0
        }
721
722
0
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
723
0
        part->id = t_part.id;
724
0
        part->is_mutable = t_part.is_mutable;
725
726
        /// just substitute directly. no need to remove and reinsert keys.
727
        // range partition
728
0
        part->start_key = std::move(old_part->start_key);
729
0
        part->end_key = std::move(old_part->end_key);
730
        // list partition
731
0
        part->in_keys = std::move(old_part->in_keys);
732
0
        if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
733
0
            _default_partition = part;
734
0
        }
735
736
0
        part->num_buckets = t_part.num_buckets;
737
0
        auto num_indexes = _schema->indexes().size();
738
0
        if (t_part.indexes.size() != num_indexes) {
739
0
            return Status::InternalError(
740
0
                    "number of partition's index is not equal with schema's"
741
0
                    ", num_part_indexes={}, num_schema_indexes={}",
742
0
                    t_part.indexes.size(), num_indexes);
743
0
        }
744
0
        part->indexes = t_part.indexes;
745
0
        std::sort(part->indexes.begin(), part->indexes.end(),
746
0
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
747
0
                      return lhs.index_id < rhs.index_id;
748
0
                  });
749
        // check index
750
0
        for (int j = 0; j < num_indexes; ++j) {
751
0
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
752
0
                return Status::InternalError(
753
0
                        "partition's index is not equal with schema's"
754
0
                        ", part_index={}, schema_index={}",
755
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
756
0
            }
757
0
        }
758
759
        // add new partitions with new id.
760
0
        _partitions.emplace_back(part);
761
0
        VLOG_NOTICE << "params add new partition " << part->id;
762
763
        // replace items in _partition_maps
764
0
        if (_is_in_partition) {
765
0
            for (auto& in_key : part->in_keys) {
766
0
                (*_partitions_map)[std::tuple {in_key.first, in_key.second, false}] = part;
767
0
            }
768
0
        } else {
769
0
            (*_partitions_map)[std::tuple {part->end_key.first, part->end_key.second, false}] =
770
0
                    part;
771
0
        }
772
0
    }
773
    // remove old partitions by id
774
0
    std::ranges::sort(old_partition_ids);
775
0
    for (auto it = _partitions.begin(); it != _partitions.end();) {
776
0
        if (std::ranges::binary_search(old_partition_ids, (*it)->id)) {
777
0
            it = _partitions.erase(it);
778
0
        } else {
779
0
            it++;
780
0
        }
781
0
    }
782
783
0
    return Status::OK();
784
0
}
785
786
} // namespace doris