Coverage Report

Created: 2026-03-19 12:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/tablet_info.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/tablet_info.h"
19
20
#include <butil/logging.h>
21
#include <gen_cpp/Descriptors_types.h>
22
#include <gen_cpp/Exprs_types.h>
23
#include <gen_cpp/Partitions_types.h>
24
#include <gen_cpp/Types_types.h>
25
#include <gen_cpp/descriptors.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
#include <glog/logging.h>
28
29
#include <algorithm>
30
#include <cstddef>
31
#include <cstdint>
32
#include <memory>
33
#include <ostream>
34
#include <string>
35
#include <tuple>
36
37
#include "common/exception.h"
38
#include "common/logging.h"
39
#include "common/status.h"
40
#include "core/column/column.h"
41
#include "core/data_type/data_type.h"
42
#include "core/data_type/data_type_factory.hpp"
43
#include "core/data_type/define_primitive_type.h"
44
#include "core/data_type/primitive_type.h"
45
#include "core/value/large_int_value.h"
46
#include "runtime/descriptors.h"
47
#include "runtime/memory/mem_tracker.h"
48
#include "storage/tablet/tablet_schema.h"
49
#include "util/raw_value.h"
50
#include "util/string_parser.hpp"
51
#include "util/string_util.h"
52
// NOLINTNEXTLINE(unused-includes)
53
#include "core/value/vdatetime_value.h"
54
#include "exprs/function/cast/cast_to_timestamptz.h"
55
#include "exprs/vexpr_context.h" // IWYU pragma: keep
56
#include "exprs/vliteral.h"
57
58
namespace doris {
59
#include "common/compile_check_begin.h"
60
61
66.9k
void OlapTableIndexSchema::to_protobuf(POlapTableIndexSchema* pindex) const {
62
66.9k
    pindex->set_id(index_id);
63
66.9k
    pindex->set_schema_hash(schema_hash);
64
419k
    for (auto* slot : slots) {
65
419k
        pindex->add_columns(slot->col_name());
66
419k
    }
67
443k
    for (auto* column : columns) {
68
443k
        column->to_schema_pb(pindex->add_columns_desc());
69
443k
    }
70
66.9k
    for (auto* index : indexes) {
71
6.27k
        index->to_schema_pb(pindex->add_indexes_desc());
72
6.27k
    }
73
66.9k
}
74
75
bool VOlapTablePartKeyComparator::operator()(const BlockRowWithIndicator& lhs,
76
61.7M
                                             const BlockRowWithIndicator& rhs) const {
77
61.7M
    Block* l_block = std::get<0>(lhs);
78
61.7M
    Block* r_block = std::get<0>(rhs);
79
61.7M
    int32_t l_row = std::get<1>(lhs);
80
61.7M
    int32_t r_row = std::get<1>(rhs);
81
61.7M
    bool l_use_new = std::get<2>(lhs);
82
61.7M
    bool r_use_new = std::get<2>(rhs);
83
84
18.4E
    VLOG_TRACE << '\n' << l_block->dump_data() << '\n' << r_block->dump_data();
85
86
61.7M
    if (l_row == -1) {
87
180
        return false;
88
61.7M
    } else if (r_row == -1) {
89
33.7M
        return true;
90
33.7M
    }
91
92
28.0M
    if (_param_locs.empty()) { // no transform, use origin column
93
27.2M
        for (auto slot_loc : _slot_locs) {
94
27.2M
            auto res = l_block->get_by_position(slot_loc).column->compare_at(
95
27.2M
                    l_row, r_row, *r_block->get_by_position(slot_loc).column, -1);
96
27.2M
            if (res != 0) {
97
27.0M
                return res < 0;
98
27.0M
            }
99
27.2M
        }
100
27.2M
    } else { // use transformed column to compare
101
18.4E
        DCHECK(_slot_locs.size() == _param_locs.size())
102
18.4E
                << _slot_locs.size() << ' ' << _param_locs.size();
103
104
808k
        const std::vector<uint16_t>* l_index = l_use_new ? &_param_locs : &_slot_locs;
105
18.4E
        const std::vector<uint16_t>* r_index = r_use_new ? &_param_locs : &_slot_locs;
106
107
1.18M
        for (int i = 0; i < _slot_locs.size(); i++) {
108
1.08M
            ColumnPtr l_col = l_block->get_by_position((*l_index)[i]).column;
109
1.08M
            ColumnPtr r_col = r_block->get_by_position((*r_index)[i]).column;
110
111
1.08M
            auto res = l_col->compare_at(l_row, r_row, *r_col, -1);
112
1.08M
            if (res != 0) {
113
713k
                return res < 0;
114
713k
            }
115
1.08M
        }
116
808k
    }
117
118
    // equal, return false
119
304k
    return false;
120
28.0M
}
121
122
29.2k
Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) {
123
29.2k
    _db_id = pschema.db_id();
124
29.2k
    _table_id = pschema.table_id();
125
29.2k
    _version = pschema.version();
126
29.2k
    if (pschema.has_unique_key_update_mode()) {
127
29.2k
        _unique_key_update_mode = pschema.unique_key_update_mode();
128
29.2k
        if (pschema.has_sequence_map_col_unique_id()) {
129
29.2k
            _sequence_map_col_uid = pschema.sequence_map_col_unique_id();
130
29.2k
        }
131
18.4E
    } else {
132
        // for backward compatibility
133
18.4E
        if (pschema.has_partial_update() && pschema.partial_update()) {
134
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
135
18.4E
        } else {
136
18.4E
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
137
18.4E
        }
138
18.4E
    }
139
29.2k
    _is_strict_mode = pschema.is_strict_mode();
140
29.2k
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
141
2.59k
        _auto_increment_column = pschema.auto_increment_column();
142
2.59k
        if (!_auto_increment_column.empty() && pschema.auto_increment_column_unique_id() == -1) {
143
0
            return Status::InternalError(
144
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
145
0
                    "different from BE.");
146
0
        }
147
2.59k
        _auto_increment_column_unique_id = pschema.auto_increment_column_unique_id();
148
2.59k
    }
149
29.2k
    if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPSERT) {
150
2.75k
        if (pschema.has_partial_update_new_key_policy()) {
151
2.75k
            _partial_update_new_row_policy = pschema.partial_update_new_key_policy();
152
2.75k
        }
153
2.75k
    }
154
29.2k
    _timestamp_ms = pschema.timestamp_ms();
155
29.2k
    if (pschema.has_nano_seconds()) {
156
29.2k
        _nano_seconds = pschema.nano_seconds();
157
29.2k
    }
158
29.2k
    _timezone = pschema.timezone();
159
160
29.2k
    for (const auto& col : pschema.partial_update_input_columns()) {
161
16.7k
        _partial_update_input_columns.insert(col);
162
16.7k
    }
163
29.2k
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
164
165
29.2k
    _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc()));
166
167
268k
    for (const auto& p_slot_desc : pschema.slot_descs()) {
168
268k
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc));
169
268k
        _tuple_desc->add_slot(slot_desc);
170
268k
        std::string data_type;
171
268k
        EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), data_type);
172
268k
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
173
268k
        std::string data_type_str =
174
268k
                std::to_string(int64_t(TabletColumn::get_field_type_by_string(data_type)));
175
268k
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
176
268k
                          slot_desc);
177
268k
    }
178
179
37.1k
    for (const auto& p_index : pschema.indexes()) {
180
37.1k
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
181
37.1k
        index->index_id = p_index.id();
182
37.1k
        index->schema_hash = p_index.schema_hash();
183
291k
        for (const auto& pcolumn_desc : p_index.columns_desc()) {
184
291k
            if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS ||
185
291k
                _partial_update_input_columns.contains(pcolumn_desc.name())) {
186
270k
                std::string is_null_str = pcolumn_desc.is_nullable() ? "true" : "false";
187
270k
                std::string data_type_str = std::to_string(
188
270k
                        int64_t(TabletColumn::get_field_type_by_string(pcolumn_desc.type())));
189
270k
                auto it = slots_map.find(to_lower(pcolumn_desc.name()) + "+" + data_type_str +
190
270k
                                         is_null_str);
191
270k
                if (it == std::end(slots_map)) {
192
0
                    std::string keys {};
193
0
                    for (const auto& [key, _] : slots_map) {
194
0
                        keys += fmt::format("{},", key);
195
0
                    }
196
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
197
0
                            "[OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema)]: "
198
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
199
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\npschema={}",
200
0
                            pcolumn_desc.name(), pcolumn_desc.type(), data_type_str, is_null_str,
201
0
                            keys, debug_string(), pschema.ShortDebugString());
202
203
0
                    return Status::InternalError("unknown index column, column={}, type={}",
204
0
                                                 pcolumn_desc.name(), pcolumn_desc.type());
205
0
                }
206
270k
                index->slots.emplace_back(it->second);
207
270k
            }
208
291k
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
209
291k
            tc->init_from_pb(pcolumn_desc);
210
291k
            index->columns.emplace_back(tc);
211
291k
        }
212
37.1k
        for (const auto& pindex_desc : p_index.indexes_desc()) {
213
5.91k
            TabletIndex* ti = _obj_pool.add(new TabletIndex());
214
5.91k
            ti->init_from_pb(pindex_desc);
215
5.91k
            index->indexes.emplace_back(ti);
216
5.91k
        }
217
37.1k
        _indexes.emplace_back(index);
218
37.1k
    }
219
220
29.2k
    std::sort(_indexes.begin(), _indexes.end(),
221
32.9k
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
222
32.9k
                  return lhs->index_id < rhs->index_id;
223
32.9k
              });
224
29.2k
    return Status::OK();
225
29.2k
}
226
227
95.2k
Status OlapTableSchemaParam::init_unique_key_update_mode(const TOlapTableSchemaParam& tschema) {
228
95.2k
    if (tschema.__isset.unique_key_update_mode) {
229
95.2k
        switch (tschema.unique_key_update_mode) {
230
88.5k
        case doris::TUniqueKeyUpdateMode::UPSERT: {
231
88.5k
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
232
88.5k
            break;
233
0
        }
234
6.52k
        case doris::TUniqueKeyUpdateMode::UPDATE_FIXED_COLUMNS: {
235
6.52k
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
236
6.52k
            break;
237
0
        }
238
157
        case doris::TUniqueKeyUpdateMode::UPDATE_FLEXIBLE_COLUMNS: {
239
157
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS;
240
157
            break;
241
0
        }
242
0
        default: {
243
0
            return Status::InternalError(
244
0
                    "Unknown unique_key_update_mode: {}, should be one of "
245
0
                    "UPSERT/UPDATE_FIXED_COLUMNS/UPDATE_FLEXIBLE_COLUMNS",
246
0
                    tschema.unique_key_update_mode);
247
0
        }
248
95.2k
        }
249
95.2k
        if (tschema.__isset.sequence_map_col_unique_id) {
250
95.2k
            _sequence_map_col_uid = tschema.sequence_map_col_unique_id;
251
95.2k
        }
252
95.2k
    } else {
253
        // for backward compatibility
254
11
        if (tschema.__isset.is_partial_update && tschema.is_partial_update) {
255
0
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
256
11
        } else {
257
11
            _unique_key_update_mode = UniqueKeyUpdateModePB::UPSERT;
258
11
        }
259
11
    }
260
95.2k
    return Status::OK();
261
95.2k
}
262
263
95.2k
Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) {
264
95.2k
    _db_id = tschema.db_id;
265
95.2k
    _table_id = tschema.table_id;
266
95.2k
    _version = tschema.version;
267
95.2k
    RETURN_IF_ERROR(init_unique_key_update_mode(tschema));
268
95.2k
    if (tschema.__isset.is_strict_mode) {
269
95.2k
        _is_strict_mode = tschema.is_strict_mode;
270
95.2k
    }
271
95.2k
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
272
6.51k
        _auto_increment_column = tschema.auto_increment_column;
273
6.51k
        if (!_auto_increment_column.empty() && tschema.auto_increment_column_unique_id == -1) {
274
0
            return Status::InternalError(
275
0
                    "Auto increment column id is not set in FE. Maybe FE is an older version "
276
0
                    "different from BE.");
277
0
        }
278
6.51k
        _auto_increment_column_unique_id = tschema.auto_increment_column_unique_id;
279
6.51k
    }
280
281
95.2k
    if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPSERT) {
282
6.67k
        if (tschema.__isset.partial_update_new_key_policy) {
283
6.67k
            switch (tschema.partial_update_new_key_policy) {
284
6.61k
            case doris::TPartialUpdateNewRowPolicy::APPEND: {
285
6.61k
                _partial_update_new_row_policy = PartialUpdateNewRowPolicyPB::APPEND;
286
6.61k
                break;
287
0
            }
288
65
            case doris::TPartialUpdateNewRowPolicy::ERROR: {
289
65
                _partial_update_new_row_policy = PartialUpdateNewRowPolicyPB::ERROR;
290
65
                break;
291
0
            }
292
0
            default: {
293
0
                return Status::InvalidArgument(
294
0
                        "Unknown partial_update_new_key_behavior: {}, should be one of "
295
0
                        "'APPEND' or 'ERROR'",
296
0
                        tschema.partial_update_new_key_policy);
297
0
            }
298
6.67k
            }
299
6.67k
        }
300
6.67k
    }
301
302
95.2k
    for (const auto& tcolumn : tschema.partial_update_input_columns) {
303
36.9k
        _partial_update_input_columns.insert(tcolumn);
304
36.9k
    }
305
95.2k
    std::unordered_map<std::string, SlotDescriptor*> slots_map;
306
95.2k
    _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc));
307
659k
    for (const auto& t_slot_desc : tschema.slot_descs) {
308
659k
        auto* slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc));
309
659k
        _tuple_desc->add_slot(slot_desc);
310
659k
        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
311
659k
        std::string data_type_str = std::to_string(int64_t(slot_desc->col_type()));
312
659k
        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + data_type_str + is_null_str,
313
659k
                          slot_desc);
314
659k
    }
315
316
97.1k
    for (const auto& t_index : tschema.indexes) {
317
97.1k
        std::unordered_map<std::string, int32_t> index_slots_map;
318
97.1k
        auto* index = _obj_pool.add(new OlapTableIndexSchema());
319
97.1k
        index->index_id = t_index.id;
320
97.1k
        index->schema_hash = t_index.schema_hash;
321
708k
        for (const auto& tcolumn_desc : t_index.columns_desc) {
322
708k
            if (_unique_key_update_mode != UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS ||
323
708k
                _partial_update_input_columns.contains(tcolumn_desc.column_name)) {
324
661k
                std::string is_null_str = tcolumn_desc.is_allow_null ? "true" : "false";
325
661k
                std::string data_type_str =
326
661k
                        std::to_string(int64_t(thrift_to_type(tcolumn_desc.column_type.type)));
327
661k
                auto it = slots_map.find(to_lower(tcolumn_desc.column_name) + "+" + data_type_str +
328
661k
                                         is_null_str);
329
661k
                if (it == slots_map.end()) {
330
0
                    std::stringstream ss;
331
0
                    ss << tschema;
332
0
                    std::string keys {};
333
0
                    for (const auto& [key, _] : slots_map) {
334
0
                        keys += fmt::format("{},", key);
335
0
                    }
336
0
                    LOG_EVERY_SECOND(WARNING) << fmt::format(
337
0
                            "[OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema)]: "
338
0
                            "unknown index column, column={}, type={}, data_type_str={}, "
339
0
                            "is_null_str={}, slots_map.keys()=[{}], {}\ntschema={}",
340
0
                            tcolumn_desc.column_name, tcolumn_desc.column_type.type, data_type_str,
341
0
                            is_null_str, keys, debug_string(), ss.str());
342
0
                    return Status::InternalError("unknown index column, column={}, type={}",
343
0
                                                 tcolumn_desc.column_name,
344
0
                                                 tcolumn_desc.column_type.type);
345
0
                }
346
661k
                index->slots.emplace_back(it->second);
347
661k
            }
348
708k
            index_slots_map.emplace(to_lower(tcolumn_desc.column_name), tcolumn_desc.col_unique_id);
349
708k
            TabletColumn* tc = _obj_pool.add(new TabletColumn());
350
708k
            tc->init_from_thrift(tcolumn_desc);
351
708k
            index->columns.emplace_back(tc);
352
708k
        }
353
97.1k
        if (t_index.__isset.indexes_desc) {
354
97.1k
            for (const auto& tindex_desc : t_index.indexes_desc) {
355
10.9k
                std::vector<int32_t> column_unique_ids(tindex_desc.columns.size());
356
21.9k
                for (size_t i = 0; i < tindex_desc.columns.size(); i++) {
357
11.0k
                    auto it = index_slots_map.find(to_lower(tindex_desc.columns[i]));
358
11.0k
                    if (it != index_slots_map.end()) {
359
11.0k
                        column_unique_ids[i] = it->second;
360
11.0k
                    }
361
11.0k
                }
362
10.9k
                TabletIndex* ti = _obj_pool.add(new TabletIndex());
363
10.9k
                ti->init_from_thrift(tindex_desc, column_unique_ids);
364
10.9k
                index->indexes.emplace_back(ti);
365
10.9k
            }
366
97.1k
        }
367
97.1k
        if (t_index.__isset.where_clause) {
368
66
            RETURN_IF_ERROR(VExpr::create_expr_tree(t_index.where_clause, index->where_clause));
369
66
        }
370
97.1k
        _indexes.emplace_back(index);
371
97.1k
    }
372
373
95.2k
    std::sort(_indexes.begin(), _indexes.end(),
374
95.2k
              [](const OlapTableIndexSchema* lhs, const OlapTableIndexSchema* rhs) {
375
5.45k
                  return lhs->index_id < rhs->index_id;
376
5.45k
              });
377
95.2k
    return Status::OK();
378
95.2k
}
379
380
65.9k
void OlapTableSchemaParam::to_protobuf(POlapTableSchemaParam* pschema) const {
381
65.9k
    pschema->set_db_id(_db_id);
382
65.9k
    pschema->set_table_id(_table_id);
383
65.9k
    pschema->set_version(_version);
384
65.9k
    pschema->set_unique_key_update_mode(_unique_key_update_mode);
385
65.9k
    if (_unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) {
386
        // for backward compatibility
387
3.25k
        pschema->set_partial_update(true);
388
3.25k
    }
389
65.9k
    pschema->set_partial_update_new_key_policy(_partial_update_new_row_policy);
390
65.9k
    pschema->set_is_strict_mode(_is_strict_mode);
391
65.9k
    pschema->set_auto_increment_column(_auto_increment_column);
392
65.9k
    pschema->set_auto_increment_column_unique_id(_auto_increment_column_unique_id);
393
65.9k
    pschema->set_timestamp_ms(_timestamp_ms);
394
65.9k
    pschema->set_timezone(_timezone);
395
65.9k
    pschema->set_nano_seconds(_nano_seconds);
396
65.9k
    pschema->set_sequence_map_col_unique_id(_sequence_map_col_uid);
397
65.9k
    for (auto col : _partial_update_input_columns) {
398
18.5k
        *pschema->add_partial_update_input_columns() = col;
399
18.5k
    }
400
65.9k
    _tuple_desc->to_protobuf(pschema->mutable_tuple_desc());
401
417k
    for (auto* slot : _tuple_desc->slots()) {
402
417k
        slot->to_protobuf(pschema->add_slot_descs());
403
417k
    }
404
67.1k
    for (auto* index : _indexes) {
405
67.1k
        index->to_protobuf(pschema->add_indexes());
406
67.1k
    }
407
65.9k
}
408
409
0
std::string OlapTableSchemaParam::debug_string() const {
410
0
    std::stringstream ss;
411
0
    ss << "tuple_desc=" << _tuple_desc->debug_string();
412
0
    return ss.str();
413
0
}
414
415
VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
416
                                                   const TOlapTablePartitionParam& t_param)
417
68.8k
        : _schema(schema),
418
68.8k
          _t_param(t_param),
419
68.8k
          _slots(_schema->tuple_desc()->slots()),
420
68.8k
          _mem_tracker(std::make_unique<MemTracker>("OlapTablePartitionParam")),
421
68.8k
          _part_type(t_param.partition_type) {
422
68.8k
    if (t_param.__isset.enable_automatic_partition && t_param.enable_automatic_partition) {
423
376
        _is_auto_partition = true;
424
376
        auto size = t_param.partition_function_exprs.size();
425
376
        _part_func_ctx.resize(size);
426
376
        _partition_function.resize(size);
427
376
        DCHECK((t_param.partition_type == TPartitionType::RANGE_PARTITIONED && size == 1) ||
428
1
               (t_param.partition_type == TPartitionType::LIST_PARTITIONED && size >= 1))
429
1
                << "now support only 1 partition column for auto range partitions. "
430
1
                << t_param.partition_type << " " << size;
431
761
        for (int i = 0; i < size; ++i) {
432
385
            Status st =
433
385
                    VExpr::create_expr_tree(t_param.partition_function_exprs[i], _part_func_ctx[i]);
434
385
            if (!st.ok()) {
435
0
                throw Exception(Status::InternalError("Partition function expr is not valid"),
436
0
                                "Partition function expr is not valid");
437
0
            }
438
385
            _partition_function[i] = _part_func_ctx[i]->root();
439
385
        }
440
376
    }
441
442
68.8k
    if (t_param.__isset.enable_auto_detect_overwrite && t_param.enable_auto_detect_overwrite) {
443
58
        _is_auto_detect_overwrite = true;
444
58
        DCHECK(t_param.__isset.overwrite_group_id);
445
58
        _overwrite_group_id = t_param.overwrite_group_id;
446
58
    }
447
448
68.8k
    if (t_param.__isset.master_address) {
449
0
        _master_address = std::make_shared<TNetworkAddress>(t_param.master_address);
450
0
    }
451
452
68.8k
    if (_is_auto_partition) {
453
        // the nullable mode depends on partition_exprs. not column slots. so use them.
454
376
        DCHECK(_partition_function.size() <= _slots.size())
455
0
                << _partition_function.size() << ", " << _slots.size();
456
457
        // suppose (k0, [k1], [k2]), so get [k1, 0], [k2, 1]
458
376
        std::map<std::string, int> partition_slots_map; // name to idx in part_exprs
459
763
        for (size_t i = 0; i < t_param.partition_columns.size(); i++) {
460
387
            partition_slots_map.emplace(t_param.partition_columns[i], i);
461
387
        }
462
463
        // here we rely on the same order and number of the _part_funcs and _slots in the prefix
464
        // _part_block contains all slots of table.
465
1.43k
        for (auto* slot : _slots) {
466
            // try to replace with partition expr.
467
1.43k
            if (auto it = partition_slots_map.find(slot->col_name());
468
1.43k
                it != partition_slots_map.end()) { // it's a partition column slot
469
387
                auto& expr_type = _partition_function[it->second]->data_type();
470
387
                _partition_block.insert({expr_type->create_column(), expr_type, slot->col_name()});
471
1.05k
            } else {
472
1.05k
                _partition_block.insert({slot->get_empty_mutable_column(),
473
1.05k
                                         slot->get_data_type_ptr(), slot->col_name()});
474
1.05k
            }
475
1.43k
        }
476
376
        VLOG_TRACE << _partition_block.dump_structure();
477
68.5k
    } else {
478
        // we insert all. but not all will be used. it will controlled by _partition_slot_locs
479
443k
        for (auto* slot : _slots) {
480
443k
            _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(),
481
443k
                                     slot->col_name()});
482
443k
        }
483
68.5k
    }
484
68.8k
}
485
486
69.0k
VOlapTablePartitionParam::~VOlapTablePartitionParam() {
487
69.0k
    _mem_tracker->release(_mem_usage);
488
69.0k
}
489
490
68.6k
Status VOlapTablePartitionParam::init() {
491
68.6k
    std::vector<std::string> slot_column_names;
492
444k
    for (auto* slot_desc : _schema->tuple_desc()->slots()) {
493
444k
        slot_column_names.emplace_back(slot_desc->col_name());
494
444k
    }
495
496
68.6k
    auto find_slot_locs = [&slot_column_names](const std::string& slot_name,
497
68.6k
                                               std::vector<uint16_t>& locs,
498
74.3k
                                               const std::string& column_type) {
499
74.3k
        auto it = std::find(slot_column_names.begin(), slot_column_names.end(), slot_name);
500
74.3k
        if (it == slot_column_names.end()) {
501
0
            return Status::InternalError("{} column not found, column ={}", column_type, slot_name);
502
0
        }
503
74.3k
        locs.emplace_back(it - slot_column_names.begin());
504
74.3k
        return Status::OK();
505
74.3k
    };
506
507
    // here we find the partition columns. others maybe non-partition columns/special columns.
508
68.6k
    if (_t_param.__isset.partition_columns) {
509
7.37k
        for (auto& part_col : _t_param.partition_columns) {
510
7.37k
            RETURN_IF_ERROR(find_slot_locs(part_col, _partition_slot_locs, "partition"));
511
7.37k
        }
512
7.25k
    }
513
514
68.6k
    _partitions_map = std::make_unique<
515
68.6k
            std::map<BlockRowWithIndicator, VOlapTablePartition*, VOlapTablePartKeyComparator>>(
516
68.6k
            VOlapTablePartKeyComparator(_partition_slot_locs, _transformed_slot_locs));
517
68.6k
    if (_t_param.__isset.distributed_columns) {
518
68.5k
        for (auto& col : _t_param.distributed_columns) {
519
67.0k
            RETURN_IF_ERROR(find_slot_locs(col, _distributed_slot_locs, "distributed"));
520
67.0k
        }
521
68.5k
    }
522
523
    // for both auto/non-auto partition table.
524
68.6k
    _is_in_partition = _part_type == TPartitionType::type::LIST_PARTITIONED;
525
526
    // initial partitions. if meet dummy partitions only for open BE nodes, not generate key of them for finding
527
85.1k
    for (const auto& t_part : _t_param.partitions) {
528
85.1k
        VOlapTablePartition* part = nullptr;
529
85.1k
        RETURN_IF_ERROR(generate_partition_from(t_part, part));
530
85.1k
        _partitions.emplace_back(part);
531
532
85.1k
        if (!_t_param.partitions_is_fake) {
533
85.0k
            if (_is_in_partition) {
534
6.49k
                for (auto& in_key : part->in_keys) {
535
6.49k
                    _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
536
6.49k
                }
537
80.2k
            } else {
538
80.2k
                _partitions_map->emplace(
539
80.2k
                        std::tuple {part->end_key.first, part->end_key.second, false}, part);
540
80.2k
            }
541
85.0k
        }
542
85.1k
    }
543
544
68.6k
    _mem_usage = _partition_block.allocated_bytes();
545
68.6k
    _mem_tracker->consume(_mem_usage);
546
68.6k
    return Status::OK();
547
68.6k
}
548
549
bool VOlapTablePartitionParam::_part_contains(VOlapTablePartition* part,
550
40.3M
                                              BlockRowWithIndicator key) const {
551
40.3M
    VOlapTablePartKeyComparator comparator(_partition_slot_locs, _transformed_slot_locs);
552
    // we have used upper_bound to find to ensure key < part.right and this part is closest(right - key is min)
553
    // now we only have to check (key >= part.left). the comparator(a,b) means a < b, so we use anti
554
40.3M
    return part->start_key.second == -1 /* spj: start_key.second == -1 means only single partition*/
555
40.3M
           || !comparator(key, std::tuple {part->start_key.first, part->start_key.second, false});
556
40.3M
}
557
558
// insert value into _partition_block's column
559
// NOLINTBEGIN(readability-function-size)
560
43.8k
static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, uint16_t pos) {
561
43.8k
    auto column = std::move(*part_key->first->get_by_position(pos).column).mutate();
562
    //TODO: use assert_cast before insert_data
563
43.8k
    switch (t_expr.node_type) {
564
27.2k
    case TExprNodeType::DATE_LITERAL: {
565
27.2k
        auto primitive_type =
566
27.2k
                DataTypeFactory::instance().create_data_type(t_expr.type)->get_primitive_type();
567
27.2k
        if (primitive_type == TYPE_DATEV2) {
568
21.6k
            DateV2Value<DateV2ValueType> dt;
569
21.6k
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
570
21.6k
                                  t_expr.date_literal.value.size())) {
571
0
                std::stringstream ss;
572
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
573
0
                return Status::InternalError(ss.str());
574
0
            }
575
21.6k
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
576
21.6k
        } else if (primitive_type == TYPE_DATETIMEV2) {
577
4.94k
            DateV2Value<DateTimeV2ValueType> dt;
578
4.94k
            const int32_t scale =
579
4.94k
                    t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
580
4.94k
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
581
4.94k
                                  t_expr.date_literal.value.size(), scale)) {
582
0
                std::stringstream ss;
583
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
584
0
                return Status::InternalError(ss.str());
585
0
            }
586
4.94k
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
587
4.94k
        } else if (primitive_type == TYPE_TIMESTAMPTZ) {
588
387
            TimestampTzValue res;
589
387
            CastParameters params {.status = Status::OK(), .is_strict = true};
590
387
            const int32_t scale =
591
387
                    t_expr.type.types.empty() ? -1 : t_expr.type.types.front().scalar_type.scale;
592
387
            if (!CastToTimstampTz::from_string(
593
387
                        {t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size()}, res,
594
387
                        params, nullptr, scale)) [[unlikely]] {
595
0
                std::stringstream ss;
596
0
                ss << "invalid timestamptz literal in partition column, value="
597
0
                   << t_expr.date_literal;
598
0
                return Status::InternalError(ss.str());
599
387
            } else {
600
387
                column->insert_data(reinterpret_cast<const char*>(&res), 0);
601
387
            }
602
387
        } else {
603
            // TYPE_DATE (DATEV1) or TYPE_DATETIME (DATETIMEV1)
604
268
            VecDateTimeValue dt;
605
268
            if (!dt.from_date_str(t_expr.date_literal.value.c_str(),
606
268
                                  t_expr.date_literal.value.size())) {
607
0
                std::stringstream ss;
608
0
                ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
609
0
                return Status::InternalError(ss.str());
610
0
            }
611
268
            if (DataTypeFactory::instance().create_data_type(t_expr.type)->get_primitive_type() ==
612
268
                TYPE_DATE) {
613
120
                dt.cast_to_date();
614
120
            }
615
268
            column->insert_data(reinterpret_cast<const char*>(&dt), 0);
616
268
        }
617
27.2k
        break;
618
27.2k
    }
619
27.2k
    case TExprNodeType::INT_LITERAL: {
620
13.6k
        switch (t_expr.type.types[0].scalar_type.type) {
621
775
        case TPrimitiveType::TINYINT: {
622
775
            auto value = cast_set<int8_t>(t_expr.int_literal.value);
623
775
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
624
775
            break;
625
0
        }
626
389
        case TPrimitiveType::SMALLINT: {
627
389
            auto value = cast_set<int16_t>(t_expr.int_literal.value);
628
389
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
629
389
            break;
630
0
        }
631
11.9k
        case TPrimitiveType::INT: {
632
11.9k
            auto value = cast_set<int32_t>(t_expr.int_literal.value);
633
11.9k
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
634
11.9k
            break;
635
0
        }
636
593
        default:
637
593
            int64_t value = t_expr.int_literal.value;
638
593
            column->insert_data(reinterpret_cast<const char*>(&value), 0);
639
13.6k
        }
640
13.6k
        break;
641
13.6k
    }
642
13.6k
    case TExprNodeType::LARGE_INT_LITERAL: {
643
180
        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
644
180
        auto value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(),
645
180
                                                           t_expr.large_int_literal.value.size(),
646
180
                                                           &parse_result);
647
180
        if (parse_result != StringParser::PARSE_SUCCESS) {
648
0
            value = MAX_INT128;
649
0
        }
650
180
        column->insert_data(reinterpret_cast<const char*>(&value), 0);
651
180
        break;
652
13.6k
    }
653
2.61k
    case TExprNodeType::STRING_LITERAL: {
654
2.61k
        size_t len = t_expr.string_literal.value.size();
655
2.61k
        const char* str_val = t_expr.string_literal.value.c_str();
656
2.61k
        column->insert_data(str_val, len);
657
2.61k
        break;
658
13.6k
    }
659
23
    case TExprNodeType::BOOL_LITERAL: {
660
23
        column->insert_data(reinterpret_cast<const char*>(&t_expr.bool_literal.value), 0);
661
23
        break;
662
13.6k
    }
663
55
    case TExprNodeType::NULL_LITERAL: {
664
        // insert a null literal
665
55
        if (!column->is_nullable()) {
666
            // https://github.com/apache/doris/pull/39449 have forbid this cause. always add this check as protective measures
667
0
            return Status::InternalError("The column {} is not null, can't insert into NULL value.",
668
0
                                         part_key->first->get_by_position(pos).name);
669
0
        }
670
55
        column->insert_data(nullptr, 0);
671
55
        break;
672
55
    }
673
0
    default: {
674
0
        return Status::InternalError("unsupported partition column node type, type={}",
675
0
                                     t_expr.node_type);
676
55
    }
677
43.8k
    }
678
43.8k
    part_key->second = cast_set<int32_t>(column->size() - 1);
679
43.8k
    return Status::OK();
680
43.8k
}
681
// NOLINTEND(readability-function-size)
682
683
Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs,
684
43.0k
                                                        BlockRow* part_key) {
685
86.9k
    for (int i = 0; i < t_exprs.size(); i++) {
686
43.8k
        RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key, _partition_slot_locs[i]));
687
43.8k
    }
688
43.0k
    return Status::OK();
689
43.0k
}
690
691
Status VOlapTablePartitionParam::generate_partition_from(const TOlapTablePartition& t_part,
692
85.3k
                                                         VOlapTablePartition*& part_result) {
693
85.3k
    DCHECK(part_result == nullptr);
694
    // here we set the default value of partition bounds first! if it doesn't have some key, it will be -1.
695
85.3k
    part_result = _obj_pool.add(new VOlapTablePartition(&_partition_block));
696
85.3k
    part_result->id = t_part.id;
697
85.3k
    part_result->is_mutable = t_part.is_mutable;
698
    // only load_to_single_tablet = true will set load_tablet_idx
699
85.3k
    if (t_part.__isset.load_tablet_idx) {
700
35.7k
        part_result->load_tablet_idx = t_part.load_tablet_idx;
701
35.7k
    }
702
703
85.3k
    if (_is_in_partition) {
704
6.70k
        for (const auto& keys : t_part.in_keys) {
705
6.70k
            RETURN_IF_ERROR(_create_partition_keys(
706
6.70k
                    keys, &part_result->in_keys.emplace_back(&_partition_block, -1)));
707
6.70k
        }
708
5.02k
        if (t_part.__isset.is_default_partition && t_part.is_default_partition &&
709
5.02k
            _default_partition == nullptr) {
710
17
            _default_partition = part_result;
711
17
        }
712
80.3k
    } else { // range
713
80.3k
        if (t_part.__isset.start_keys) {
714
17.1k
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part_result->start_key));
715
17.1k
        }
716
        // we generate the right bound but not insert into partition map
717
80.3k
        if (t_part.__isset.end_keys) {
718
18.6k
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part_result->end_key));
719
18.6k
        }
720
80.3k
    }
721
722
85.3k
    part_result->num_buckets = t_part.num_buckets;
723
85.3k
    auto num_indexes = _schema->indexes().size();
724
85.3k
    if (t_part.indexes.size() != num_indexes) {
725
0
        return Status::InternalError(
726
0
                "number of partition's index is not equal with schema's"
727
0
                ", num_part_indexes={}, num_schema_indexes={}",
728
0
                t_part.indexes.size(), num_indexes);
729
0
    }
730
85.3k
    part_result->indexes = t_part.indexes;
731
85.3k
    std::sort(part_result->indexes.begin(), part_result->indexes.end(),
732
85.3k
              [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
733
4.61k
                  return lhs.index_id < rhs.index_id;
734
4.61k
              });
735
    // check index
736
172k
    for (int j = 0; j < num_indexes; ++j) {
737
86.9k
        if (part_result->indexes[j].index_id != _schema->indexes()[j]->index_id) {
738
0
            return Status::InternalError(
739
0
                    "partition's index is not equal with schema's"
740
0
                    ", part_index={}, schema_index={}",
741
0
                    part_result->indexes[j].index_id, _schema->indexes()[j]->index_id);
742
0
        }
743
86.9k
    }
744
85.3k
    if (t_part.__isset.total_replica_num) {
745
85.3k
        part_result->total_replica_num = t_part.total_replica_num;
746
85.3k
    }
747
85.4k
    if (t_part.__isset.load_required_replica_num) {
748
85.4k
        part_result->load_required_replica_num = t_part.load_required_replica_num;
749
85.4k
    }
750
85.3k
    if (t_part.__isset.tablet_version_gap_backends) {
751
0
        for (const auto& [tablet_id, backend_ids] : t_part.tablet_version_gap_backends) {
752
0
            auto& gap_set = part_result->tablet_version_gap_backends[tablet_id];
753
0
            for (auto backend_id : backend_ids) {
754
0
                gap_set.insert(backend_id);
755
0
            }
756
0
        }
757
0
    }
758
85.3k
    return Status::OK();
759
85.3k
}
760
761
Status VOlapTablePartitionParam::add_partitions(
762
173
        const std::vector<TOlapTablePartition>& partitions) {
763
343
    for (const auto& t_part : partitions) {
764
343
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
765
343
        part->id = t_part.id;
766
343
        part->is_mutable = t_part.is_mutable;
767
768
        // we dont pass right keys when it's MAX_VALUE. so there's possibility we only have start_key but not end_key
769
        // range partition
770
343
        if (t_part.__isset.start_keys) {
771
150
            RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key));
772
150
        }
773
343
        if (t_part.__isset.end_keys) {
774
149
            RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part->end_key));
775
149
        }
776
        // list partition - we only set 1 value in 1 partition for new created ones
777
343
        if (t_part.__isset.in_keys) {
778
190
            for (const auto& keys : t_part.in_keys) {
779
190
                RETURN_IF_ERROR(_create_partition_keys(
780
190
                        keys, &part->in_keys.emplace_back(&_partition_block, -1)));
781
190
            }
782
190
            if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
783
0
                _default_partition = part;
784
0
            }
785
190
        }
786
787
343
        part->num_buckets = t_part.num_buckets;
788
343
        auto num_indexes = _schema->indexes().size();
789
343
        if (t_part.indexes.size() != num_indexes) {
790
0
            return Status::InternalError(
791
0
                    "number of partition's index is not equal with schema's"
792
0
                    ", num_part_indexes={}, num_schema_indexes={}",
793
0
                    t_part.indexes.size(), num_indexes);
794
0
        }
795
343
        part->indexes = t_part.indexes;
796
343
        std::sort(part->indexes.begin(), part->indexes.end(),
797
343
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
798
0
                      return lhs.index_id < rhs.index_id;
799
0
                  });
800
        // check index
801
686
        for (int j = 0; j < num_indexes; ++j) {
802
343
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
803
0
                return Status::InternalError(
804
0
                        "partition's index is not equal with schema's"
805
0
                        ", part_index={}, schema_index={}",
806
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
807
0
            }
808
343
        }
809
343
        _partitions.emplace_back(part);
810
        // after _creating_partiton_keys
811
343
        if (_is_in_partition) {
812
190
            for (auto& in_key : part->in_keys) {
813
190
                _partitions_map->emplace(std::tuple {in_key.first, in_key.second, false}, part);
814
190
            }
815
190
        } else {
816
153
            _partitions_map->emplace(std::tuple {part->end_key.first, part->end_key.second, false},
817
153
                                     part);
818
153
        }
819
343
    }
820
821
173
    return Status::OK();
822
173
}
823
824
Status VOlapTablePartitionParam::replace_partitions(
825
        std::vector<int64_t>& old_partition_ids,
826
20
        const std::vector<TOlapTablePartition>& new_partitions) {
827
    // remove old replaced partitions
828
20
    DCHECK(old_partition_ids.size() == new_partitions.size());
829
830
    // init and add new partitions. insert into _partitions
831
52
    for (int i = 0; i < new_partitions.size(); i++) {
832
32
        const auto& t_part = new_partitions[i];
833
        // pair old_partition_ids and new_partitions one by one. TODO: sort to opt performance
834
32
        VOlapTablePartition* old_part = nullptr;
835
32
        auto old_part_id = old_partition_ids[i];
836
32
        if (auto it = std::find_if(
837
32
                    _partitions.begin(), _partitions.end(),
838
82
                    [=](const VOlapTablePartition* lhs) { return lhs->id == old_part_id; });
839
32
            it != _partitions.end()) {
840
32
            old_part = *it;
841
32
        } else {
842
0
            return Status::InternalError("Cannot find old tablet {} in replacing", old_part_id);
843
0
        }
844
845
32
        auto* part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
846
32
        part->id = t_part.id;
847
32
        part->is_mutable = t_part.is_mutable;
848
849
        /// just substitute directly. no need to remove and reinsert keys.
850
        // range partition
851
32
        part->start_key = std::move(old_part->start_key);
852
32
        part->end_key = std::move(old_part->end_key);
853
        // list partition
854
32
        part->in_keys = std::move(old_part->in_keys);
855
32
        if (t_part.__isset.is_default_partition && t_part.is_default_partition) {
856
0
            _default_partition = part;
857
0
        }
858
859
32
        part->num_buckets = t_part.num_buckets;
860
32
        auto num_indexes = _schema->indexes().size();
861
32
        if (t_part.indexes.size() != num_indexes) {
862
0
            return Status::InternalError(
863
0
                    "number of partition's index is not equal with schema's"
864
0
                    ", num_part_indexes={}, num_schema_indexes={}",
865
0
                    t_part.indexes.size(), num_indexes);
866
0
        }
867
32
        part->indexes = t_part.indexes;
868
32
        std::sort(part->indexes.begin(), part->indexes.end(),
869
32
                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
870
0
                      return lhs.index_id < rhs.index_id;
871
0
                  });
872
        // check index
873
64
        for (int j = 0; j < num_indexes; ++j) {
874
32
            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
875
0
                return Status::InternalError(
876
0
                        "partition's index is not equal with schema's"
877
0
                        ", part_index={}, schema_index={}",
878
0
                        part->indexes[j].index_id, _schema->indexes()[j]->index_id);
879
0
            }
880
32
        }
881
882
        // add new partitions with new id.
883
32
        _partitions.emplace_back(part);
884
32
        VLOG_NOTICE << "params add new partition " << part->id;
885
886
        // replace items in _partition_maps
887
32
        if (_is_in_partition) {
888
44
            for (auto& in_key : part->in_keys) {
889
44
                (*_partitions_map)[std::tuple {in_key.first, in_key.second, false}] = part;
890
44
            }
891
21
        } else {
892
11
            (*_partitions_map)[std::tuple {part->end_key.first, part->end_key.second, false}] =
893
11
                    part;
894
11
        }
895
32
    }
896
    // remove old partitions by id
897
20
    std::ranges::sort(old_partition_ids);
898
129
    for (auto it = _partitions.begin(); it != _partitions.end();) {
899
109
        if (std::ranges::binary_search(old_partition_ids, (*it)->id)) {
900
32
            it = _partitions.erase(it);
901
77
        } else {
902
77
            it++;
903
77
        }
904
109
    }
905
906
20
    return Status::OK();
907
20
}
908
#include "common/compile_check_end.h"
909
910
} // namespace doris