be/src/format/table/equality_delete.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format/table/equality_delete.h" |
19 | | |
20 | | #include "exprs/create_predicate_function.h" |
21 | | |
22 | | namespace doris { |
23 | | #include "common/compile_check_begin.h" |
24 | | |
25 | | std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl( |
26 | 0 | const Block* delete_block, const std::vector<int>& delete_col_ids) { |
27 | 0 | DCHECK_EQ(delete_block->columns(), delete_col_ids.size()); |
28 | 0 | if (delete_block->columns() == 1) { |
29 | 0 | return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids); |
30 | 0 | } else { |
31 | 0 | return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids); |
32 | 0 | } |
33 | 0 | } |
34 | | |
35 | 0 | Status SimpleEqualityDelete::_build_set() { |
36 | 0 | COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
37 | 0 | if (_delete_block->columns() != 1) [[unlikely]] { |
38 | 0 | return Status::InternalError("Simple equality delete can be only applied with one column"); |
39 | 0 | } |
40 | 0 | auto& column_and_type = _delete_block->get_by_position(0); |
41 | 0 | auto delete_column_type = remove_nullable(column_and_type.type)->get_primitive_type(); |
42 | 0 | _hybrid_set.reset(create_set(delete_column_type, _delete_block->rows(), false)); |
43 | 0 | _hybrid_set->insert_fixed_len(column_and_type.column, 0); |
44 | 0 | return Status::OK(); |
45 | 0 | } |
46 | | |
47 | | Status SimpleEqualityDelete::filter_data_block( |
48 | | Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
49 | | const std::unordered_map<int, std::string>& id_to_block_column_name, |
50 | 0 | IColumn::Filter& filter) { |
51 | 0 | SCOPED_TIMER(equality_delete_time); |
52 | 0 | DCHECK(_delete_col_ids.size() == 1); |
53 | 0 | auto column_field_id = _delete_col_ids[0]; |
54 | |
|
55 | 0 | auto column_and_type = data_block->get_by_position( |
56 | 0 | col_name_to_block_idx->at(id_to_block_column_name.at(column_field_id))); |
57 | |
|
58 | 0 | size_t rows = data_block->rows(); |
59 | | // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set |
60 | 0 | if (_single_filter == nullptr) { |
61 | 0 | _single_filter = std::make_unique<IColumn::Filter>(rows, 0); |
62 | 0 | } else { |
63 | | // reset the array capacity and fill all elements using the 0 |
64 | 0 | _single_filter->assign(rows, UInt8(0)); |
65 | 0 | } |
66 | 0 | if (column_and_type.column->is_nullable()) { |
67 | 0 | const NullMap& null_map = |
68 | 0 | reinterpret_cast<const ColumnNullable*>(column_and_type.column.get()) |
69 | 0 | ->get_null_map_data(); |
70 | 0 | _hybrid_set->find_batch_nullable( |
71 | 0 | remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, |
72 | 0 | *_single_filter); |
73 | 0 | if (_hybrid_set->contain_null()) { |
74 | 0 | auto* filter_data = _single_filter->data(); |
75 | 0 | for (size_t i = 0; i < rows; ++i) { |
76 | 0 | filter_data[i] = filter_data[i] || null_map[i]; |
77 | 0 | } |
78 | 0 | } |
79 | 0 | } else { |
80 | 0 | _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, |
81 | 0 | *_single_filter); |
82 | 0 | } |
83 | | // should reverse _filter |
84 | 0 | auto* filter_data = filter.data(); |
85 | 0 | for (size_t i = 0; i < rows; ++i) { |
86 | 0 | filter_data[i] &= !_single_filter->data()[i]; |
87 | 0 | } |
88 | 0 | return Status::OK(); |
89 | 0 | } |
90 | | |
91 | 0 | Status MultiEqualityDelete::_build_set() { |
92 | 0 | COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
93 | 0 | size_t rows = _delete_block->rows(); |
94 | 0 | _delete_hashes.clear(); |
95 | 0 | _delete_hashes.resize(rows, 0); |
96 | 0 | for (ColumnPtr column : _delete_block->get_columns()) { |
97 | 0 | column->update_hashes_with_value(_delete_hashes.data(), nullptr); |
98 | 0 | } |
99 | 0 | for (size_t i = 0; i < rows; ++i) { |
100 | 0 | _delete_hash_map.insert({_delete_hashes[i], i}); |
101 | 0 | } |
102 | 0 | _data_column_index.resize(_delete_block->columns()); |
103 | 0 | return Status::OK(); |
104 | 0 | } |
105 | | |
106 | | Status MultiEqualityDelete::filter_data_block( |
107 | | Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
108 | | const std::unordered_map<int, std::string>& id_to_block_column_name, |
109 | 0 | IColumn::Filter& filter) { |
110 | 0 | SCOPED_TIMER(equality_delete_time); |
111 | 0 | DCHECK_EQ(_delete_block->get_columns_with_type_and_name().size(), _delete_col_ids.size()); |
112 | 0 | size_t column_index = 0; |
113 | |
|
114 | 0 | for (size_t idx = 0; idx < _delete_block->get_columns_with_type_and_name().size(); ++idx) { |
115 | 0 | auto delete_col = _delete_block->get_columns_with_type_and_name()[idx]; |
116 | 0 | auto delete_col_id = _delete_col_ids[idx]; |
117 | |
|
118 | 0 | DCHECK(id_to_block_column_name.contains(delete_col_id)); |
119 | 0 | const auto& block_column_name = id_to_block_column_name.at(delete_col_id); |
120 | 0 | if (!col_name_to_block_idx->contains(block_column_name)) [[unlikely]] { |
121 | 0 | return Status::InternalError("Column '{}' not found in data block: {}", |
122 | 0 | block_column_name, data_block->dump_structure()); |
123 | 0 | } |
124 | 0 | auto column_and_type = |
125 | 0 | data_block->safe_get_by_position(col_name_to_block_idx->at(block_column_name)); |
126 | 0 | if (!delete_col.type->equals(*column_and_type.type)) [[unlikely]] { |
127 | 0 | return Status::InternalError( |
128 | 0 | "Not support type change in column '{}', src type: {}, target type: {}", |
129 | 0 | block_column_name, delete_col.type->get_name(), |
130 | 0 | column_and_type.type->get_name()); |
131 | 0 | } |
132 | 0 | _data_column_index[column_index++] = col_name_to_block_idx->at(block_column_name); |
133 | 0 | } |
134 | 0 | size_t rows = data_block->rows(); |
135 | 0 | _data_hashes.clear(); |
136 | 0 | _data_hashes.resize(rows, 0); |
137 | 0 | for (size_t index : _data_column_index) { |
138 | 0 | data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(), |
139 | 0 | nullptr); |
140 | 0 | } |
141 | 0 | auto* filter_data = filter.data(); |
142 | 0 | for (size_t i = 0; i < rows; ++i) { |
143 | 0 | for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]), |
144 | 0 | end = _delete_hash_map.upper_bound(_data_hashes[i]); |
145 | 0 | beg != end; ++beg) { |
146 | 0 | if (filter[i] && _equal(data_block, i, beg->second)) { |
147 | 0 | filter_data[i] = 0; |
148 | 0 | break; |
149 | 0 | } |
150 | 0 | } |
151 | 0 | } |
152 | |
|
153 | 0 | return Status::OK(); |
154 | 0 | } |
155 | | |
156 | | bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index, |
157 | 0 | size_t delete_row_index) { |
158 | 0 | for (size_t i = 0; i < _delete_block->columns(); ++i) { |
159 | 0 | ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column; |
160 | 0 | ColumnPtr delete_col = _delete_block->get_by_position(i).column; |
161 | 0 | if (data_col->compare_at(data_row_index, delete_row_index, *delete_col, -1) != 0) { |
162 | 0 | return false; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | return true; |
166 | 0 | } |
167 | | |
168 | | #include "common/compile_check_end.h" |
169 | | } // namespace doris |