be/src/format/table/equality_delete.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format/table/equality_delete.h" |
19 | | |
20 | | #include "exprs/create_predicate_function.h" |
21 | | |
22 | | namespace doris { |
23 | | #include "common/compile_check_begin.h" |
24 | | |
25 | | std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl( |
26 | 0 | const Block* delete_block, const std::vector<int>& delete_col_ids) { |
27 | 0 | DCHECK_EQ(delete_block->columns(), delete_col_ids.size()); |
28 | 0 | if (delete_block->columns() == 1) { |
29 | 0 | return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids); |
30 | 0 | } else { |
31 | 0 | return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids); |
32 | 0 | } |
33 | 0 | } |
34 | | |
35 | 0 | Status SimpleEqualityDelete::_build_set() { |
36 | 0 | COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
37 | 0 | if (_delete_block->columns() != 1) [[unlikely]] { |
38 | 0 | return Status::InternalError("Simple equality delete can be only applied with one column"); |
39 | 0 | } |
40 | 0 | auto& column_and_type = _delete_block->get_by_position(0); |
41 | 0 | auto delete_column_type = remove_nullable(column_and_type.type)->get_primitive_type(); |
42 | 0 | _hybrid_set.reset(create_set(delete_column_type, _delete_block->rows(), false)); |
43 | 0 | _hybrid_set->insert_fixed_len(column_and_type.column, 0); |
44 | 0 | return Status::OK(); |
45 | 0 | } |
46 | | |
47 | | Status SimpleEqualityDelete::filter_data_block( |
48 | | Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
49 | | const std::unordered_map<int, std::string>& id_to_block_column_name, |
50 | 0 | IColumn::Filter& filter) { |
51 | 0 | SCOPED_TIMER(equality_delete_time); |
52 | 0 | DCHECK(_delete_col_ids.size() == 1); |
53 | 0 | auto column_field_id = _delete_col_ids[0]; |
54 | |
|
55 | 0 | const auto& block_col_name = id_to_block_column_name.at(column_field_id); |
56 | 0 | auto block_idx = col_name_to_block_idx->at(block_col_name); |
57 | 0 | LOG(INFO) << "[EqDeleteDebug] SimpleEqualityDelete::filter_data_block: field_id=" |
58 | 0 | << column_field_id << ", block_col_name=" << block_col_name |
59 | 0 | << ", block_idx=" << block_idx << ", delete_block_rows=" << _delete_block->rows() |
60 | 0 | << ", data_block_rows=" << data_block->rows(); |
61 | |
|
62 | 0 | auto column_and_type = data_block->get_by_position(block_idx); |
63 | |
|
64 | 0 | size_t rows = data_block->rows(); |
65 | | // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set |
66 | 0 | if (_single_filter == nullptr) { |
67 | 0 | _single_filter = std::make_unique<IColumn::Filter>(rows, 0); |
68 | 0 | } else { |
69 | | // reset the array capacity and fill all elements using the 0 |
70 | 0 | _single_filter->assign(rows, UInt8(0)); |
71 | 0 | } |
72 | 0 | if (column_and_type.column->is_nullable()) { |
73 | 0 | const NullMap& null_map = |
74 | 0 | reinterpret_cast<const ColumnNullable*>(column_and_type.column.get()) |
75 | 0 | ->get_null_map_data(); |
76 | 0 | _hybrid_set->find_batch_nullable( |
77 | 0 | remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, |
78 | 0 | *_single_filter); |
79 | 0 | if (_hybrid_set->contain_null()) { |
80 | 0 | auto* filter_data = _single_filter->data(); |
81 | 0 | for (size_t i = 0; i < rows; ++i) { |
82 | 0 | filter_data[i] = filter_data[i] || null_map[i]; |
83 | 0 | } |
84 | 0 | } |
85 | 0 | } else { |
86 | 0 | _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, |
87 | 0 | *_single_filter); |
88 | 0 | } |
89 | | // should reverse _filter |
90 | 0 | auto* filter_data = filter.data(); |
91 | 0 | for (size_t i = 0; i < rows; ++i) { |
92 | 0 | filter_data[i] &= !_single_filter->data()[i]; |
93 | 0 | } |
94 | 0 | return Status::OK(); |
95 | 0 | } |
96 | | |
97 | 0 | Status MultiEqualityDelete::_build_set() { |
98 | 0 | COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
99 | 0 | size_t rows = _delete_block->rows(); |
100 | 0 | _delete_hashes.clear(); |
101 | 0 | _delete_hashes.resize(rows, 0); |
102 | 0 | for (ColumnPtr column : _delete_block->get_columns()) { |
103 | 0 | column->update_hashes_with_value(_delete_hashes.data(), nullptr); |
104 | 0 | } |
105 | 0 | for (size_t i = 0; i < rows; ++i) { |
106 | 0 | _delete_hash_map.insert({_delete_hashes[i], i}); |
107 | 0 | } |
108 | 0 | _data_column_index.resize(_delete_block->columns()); |
109 | 0 | return Status::OK(); |
110 | 0 | } |
111 | | |
112 | | Status MultiEqualityDelete::filter_data_block( |
113 | | Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
114 | | const std::unordered_map<int, std::string>& id_to_block_column_name, |
115 | 0 | IColumn::Filter& filter) { |
116 | 0 | SCOPED_TIMER(equality_delete_time); |
117 | 0 | DCHECK_EQ(_delete_block->get_columns_with_type_and_name().size(), _delete_col_ids.size()); |
118 | 0 | size_t column_index = 0; |
119 | |
|
120 | 0 | for (size_t idx = 0; idx < _delete_block->get_columns_with_type_and_name().size(); ++idx) { |
121 | 0 | auto delete_col = _delete_block->get_columns_with_type_and_name()[idx]; |
122 | 0 | auto delete_col_id = _delete_col_ids[idx]; |
123 | |
|
124 | 0 | DCHECK(id_to_block_column_name.contains(delete_col_id)); |
125 | 0 | const auto& block_column_name = id_to_block_column_name.at(delete_col_id); |
126 | 0 | if (!col_name_to_block_idx->contains(block_column_name)) [[unlikely]] { |
127 | 0 | return Status::InternalError("Column '{}' not found in data block: {}", |
128 | 0 | block_column_name, data_block->dump_structure()); |
129 | 0 | } |
130 | 0 | auto column_and_type = |
131 | 0 | data_block->safe_get_by_position(col_name_to_block_idx->at(block_column_name)); |
132 | 0 | if (!delete_col.type->equals(*column_and_type.type)) [[unlikely]] { |
133 | 0 | return Status::InternalError( |
134 | 0 | "Not support type change in column '{}', src type: {}, target type: {}", |
135 | 0 | block_column_name, delete_col.type->get_name(), |
136 | 0 | column_and_type.type->get_name()); |
137 | 0 | } |
138 | 0 | _data_column_index[column_index++] = col_name_to_block_idx->at(block_column_name); |
139 | 0 | } |
140 | 0 | size_t rows = data_block->rows(); |
141 | 0 | _data_hashes.clear(); |
142 | 0 | _data_hashes.resize(rows, 0); |
143 | 0 | for (size_t index : _data_column_index) { |
144 | 0 | data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(), |
145 | 0 | nullptr); |
146 | 0 | } |
147 | 0 | auto* filter_data = filter.data(); |
148 | 0 | for (size_t i = 0; i < rows; ++i) { |
149 | 0 | for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]), |
150 | 0 | end = _delete_hash_map.upper_bound(_data_hashes[i]); |
151 | 0 | beg != end; ++beg) { |
152 | 0 | if (filter[i] && _equal(data_block, i, beg->second)) { |
153 | 0 | filter_data[i] = 0; |
154 | 0 | break; |
155 | 0 | } |
156 | 0 | } |
157 | 0 | } |
158 | |
|
159 | 0 | return Status::OK(); |
160 | 0 | } |
161 | | |
162 | | bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index, |
163 | 0 | size_t delete_row_index) { |
164 | 0 | for (size_t i = 0; i < _delete_block->columns(); ++i) { |
165 | 0 | ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column; |
166 | 0 | ColumnPtr delete_col = _delete_block->get_by_position(i).column; |
167 | 0 | if (data_col->compare_at(data_row_index, delete_row_index, *delete_col, -1) != 0) { |
168 | 0 | return false; |
169 | 0 | } |
170 | 0 | } |
171 | 0 | return true; |
172 | 0 | } |
173 | | |
174 | | #include "common/compile_check_end.h" |
175 | | } // namespace doris |