be/src/format/table/transactional_hive_reader.cpp

Source
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "format/table/transactional_hive_reader.h"

#include <re2/re2.h>

#include "core/data_type/data_type_factory.hpp"
#include "format/orc/vorc_reader.h"
#include "format/table/table_schema_change_helper.h"
#include "transactional_hive_common.h"

namespace doris {

namespace io {
struct IOContext;
} // namespace io
class VExprContext;
} // namespace doris

namespace doris {

TransactionalHiveReader::TransactionalHiveReader(RuntimeProfile* profile, RuntimeState* state,
                                                 const TFileScanRangeParams& params,
                                                 const TFileRangeDesc& range, size_t batch_size,
                                                 const std::string& ctz, io::IOContext* io_ctx,
                                                 FileMetaCache* meta_cache)
        : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache, false) {
    static const char* transactional_hive_profile = "TransactionalHiveProfile";
    ADD_TIMER(get_profile(), transactional_hive_profile);
    _transactional_orc_profile.num_delete_files = ADD_CHILD_COUNTER(
            get_profile(), "NumDeleteFiles", TUnit::UNIT, transactional_hive_profile);
    _transactional_orc_profile.num_delete_rows = ADD_CHILD_COUNTER(
            get_profile(), "NumDeleteRows", TUnit::UNIT, transactional_hive_profile);
    _transactional_orc_profile.delete_files_read_time =
            ADD_CHILD_TIMER(get_profile(), "DeleteFileReadTime", transactional_hive_profile);
}

// ============================================================================
// on_before_init_reader: ACID schema mapping
// ============================================================================
Status TransactionalHiveReader::on_before_init_reader(ReaderInitContext* ctx) {
    _column_descs = ctx->column_descs;
    _fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
    RETURN_IF_ERROR(
            _extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
    for (auto& desc : *ctx->column_descs) {
        if (desc.category == ColumnCategory::REGULAR ||
            desc.category == ColumnCategory::GENERATED) {
            _col_names.push_back(desc.name);
        } else if (desc.category == ColumnCategory::SYNTHESIZED &&
                   desc.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
            auto topn_row_id_column_iter = _create_topn_row_id_column_iterator();
            this->register_synthesized_column_handler(
                    desc.name,
                    [iter = std::move(topn_row_id_column_iter), this, &desc](
                            Block* block, size_t rows) -> Status {
                        return fill_topn_row_id(iter, desc.name, block, rows);
                    });
            continue;
        }
    }

    _is_acid = true;
    // Add ACID column names (originalTransaction, bucket, rowId, etc.)
    _col_names.insert(_col_names.end(), TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
                      TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end());
    ctx->column_names = _col_names;

    // Get ORC file type
    const orc::Type* orc_type_ptr = nullptr;
    RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
    const auto& orc_type = *orc_type_ptr;

    // Add ACID metadata columns to table_info_node
    for (auto idx = 0; idx < TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.size(); idx++) {
        table_info_node_ptr->add_children(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE[idx],
                                          TransactionalHive::READ_ROW_COLUMN_NAMES[idx],
                                          std::make_shared<ScalarNode>());
    }

    // https://issues.apache.org/jira/browse/HIVE-15190
    auto row_orc_type = orc_type.getSubtype(TransactionalHive::ROW_OFFSET);
    std::vector<std::string> row_names;
    std::map<std::string, uint64_t> row_names_map;
    for (uint64_t idx = 0; idx < row_orc_type->getSubtypeCount(); idx++) {
        const auto& file_column_name = row_orc_type->getFieldName(idx);
        row_names.emplace_back(file_column_name);
        row_names_map.emplace(file_column_name, idx);
    }

    // Match table columns to file columns by name
    for (const auto& slot : ctx->tuple_descriptor->slots()) {
        const auto& slot_name = slot->col_name();

        if (std::count(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
                       TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end(), slot_name) > 0) {
            return Status::InternalError("Column {} conflicts with ACID metadata column",
                                         slot_name);
        }

        if (row_names_map.contains(slot_name)) {
            std::shared_ptr<Node> child_node = nullptr;
            RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(
                    slot->type(), row_orc_type->getSubtype(row_names_map[slot_name]), child_node));
            auto file_column_name = fmt::format(
                    "{}.{}", TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
                    slot_name);
            table_info_node_ptr->add_children(slot_name, file_column_name, child_node);
        } else {
            table_info_node_ptr->add_not_exist_children(slot_name);
        }
    }
    ctx->table_info_node = table_info_node_ptr;
    return Status::OK();
}

// ============================================================================
// on_after_init_reader: read delete delta files
// ============================================================================
Status TransactionalHiveReader::on_after_init_reader(ReaderInitContext* /*ctx*/) {
    std::string data_file_path = get_scan_range().path;
    // the path in _range has the namenode prefix removed,
    // and the file_path in delete file is full path, so we should add it back.
    if (get_scan_params().__isset.hdfs_params && get_scan_params().hdfs_params.__isset.fs_name) {
        std::string fs_name = get_scan_params().hdfs_params.fs_name;
        if (!starts_with(data_file_path, fs_name)) {
            data_file_path = fs_name + data_file_path;
        }
    }

    std::vector<std::string> delete_file_col_names;
    int64_t num_delete_rows = 0;
    int64_t num_delete_files = 0;
    std::filesystem::path file_path(data_file_path);

    // bucket_xxx_attemptId => bucket_xxx
    auto remove_bucket_attemptId = [](const std::string& str) {
        re2::RE2 pattern("^bucket_\\d+_\\d+$");
        if (re2::RE2::FullMatch(str, pattern)) {
            size_t pos = str.rfind('_');
            if (pos != std::string::npos) {
                return str.substr(0, pos);
            }
        }
        return str;
    };

    SCOPED_TIMER(_transactional_orc_profile.delete_files_read_time);
    for (const auto& delete_delta :
         get_scan_range().table_format_params.transactional_hive_params.delete_deltas) {
        const std::string file_name = file_path.filename().string();

        std::vector<std::string> delete_delta_file_names;
        for (const auto& x : delete_delta.file_names) {
            delete_delta_file_names.emplace_back(remove_bucket_attemptId(x));
        }
        auto iter = std::find(delete_delta_file_names.begin(), delete_delta_file_names.end(),
                              remove_bucket_attemptId(file_name));
        if (iter == delete_delta_file_names.end()) {
            continue;
        }
        auto delete_file =
                fmt::format("{}/{}", delete_delta.directory_location,
                            delete_delta.file_names[iter - delete_delta_file_names.begin()]);

        TFileRangeDesc delete_range;
        delete_range.__set_fs_name(get_scan_range().fs_name);
        delete_range.path = delete_file;
        delete_range.start_offset = 0;
        delete_range.size = -1;
        delete_range.file_size = -1;

        OrcReader delete_reader(get_profile(), get_state(), get_scan_params(), delete_range,
                                256 /*batch_size*/, get_state()->timezone(), get_io_ctx(),
                                _meta_cache, false);

        auto acid_info_node = std::make_shared<StructNode>();
        for (auto idx = 0; idx < TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.size();
             idx++) {
            auto const& table_column_name =
                    TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE[idx];
            auto const& file_column_name = TransactionalHive::DELETE_ROW_COLUMN_NAMES[idx];
            acid_info_node->add_children(table_column_name, file_column_name,
                                         std::make_shared<ScalarNode>());
        }

        OrcInitContext delete_ctx;
        delete_ctx.column_names.assign(
                TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
                TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.end());
        delete_ctx.col_name_to_block_idx = const_cast<std::unordered_map<std::string, uint32_t>*>(
                &TransactionalHive::DELETE_COL_NAME_TO_BLOCK_IDX);
        delete_ctx.table_info_node = acid_info_node;
        RETURN_IF_ERROR(delete_reader.init_reader(&delete_ctx));

        bool eof = false;
        while (!eof) {
            Block block;
            for (const auto& i : TransactionalHive::DELETE_ROW_PARAMS) {
                DataTypePtr data_type = DataTypeFactory::instance().create_data_type(i.type, false);
                MutableColumnPtr data_column = data_type->create_column();
                block.insert(ColumnWithTypeAndName(std::move(data_column), data_type,
                                                   i.column_lower_case));
            }
            eof = false;
            size_t read_rows = 0;
            RETURN_IF_ERROR(delete_reader.get_next_block(&block, &read_rows, &eof));
            if (read_rows > 0) {
                static int ORIGINAL_TRANSACTION_INDEX = 0;
                static int BUCKET_ID_INDEX = 1;
                static int ROW_ID_INDEX = 2;
                const auto& original_transaction_column = assert_cast<const ColumnInt64&>(
                        *block.get_by_position(ORIGINAL_TRANSACTION_INDEX).column);
                const auto& bucket_id_column = assert_cast<const ColumnInt32&>(
                        *block.get_by_position(BUCKET_ID_INDEX).column);
                const auto& row_id_column = assert_cast<const ColumnInt64&>(
                        *block.get_by_position(ROW_ID_INDEX).column);

                DCHECK_EQ(original_transaction_column.size(), read_rows);
                DCHECK_EQ(bucket_id_column.size(), read_rows);
                DCHECK_EQ(row_id_column.size(), read_rows);

                for (int i = 0; i < read_rows; ++i) {
                    Int64 original_transaction = original_transaction_column.get_int(i);
                    Int64 bucket_id = bucket_id_column.get_int(i);
                    Int64 row_id = row_id_column.get_int(i);
                    AcidRowID delete_row_id = {original_transaction, bucket_id, row_id};
                    _acid_delete_rows.insert(delete_row_id);
                    ++num_delete_rows;
                }
            }
        }
        ++num_delete_files;
    }
    if (num_delete_rows > 0) {
        set_push_down_agg_type(TPushAggOp::NONE);
        set_delete_rows(&_acid_delete_rows);
        COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
        COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
    }
    return Status::OK();
}

// ============================================================================
// on_before_read_block: expand ACID columns into block
// TODO: Consider caching ACID column templates at init time to avoid repeated
// create_column + map update on every batch. Requires a block template mechanism.
// ============================================================================
Status TransactionalHiveReader::on_before_read_block(Block* block) {
    for (const auto& i : TransactionalHive::READ_PARAMS) {
        DataTypePtr data_type = get_data_type_with_default_argument(
                DataTypeFactory::instance().create_data_type(i.type, false));
        MutableColumnPtr data_column = data_type->create_column();
        (*col_name_to_block_idx_ref())[i.column_lower_case] =
                static_cast<uint32_t>(block->columns());
        block->insert(
                ColumnWithTypeAndName(std::move(data_column), data_type, i.column_lower_case));
    }
    return Status::OK();
}

// ============================================================================
// on_after_read_block: shrink ACID columns from block
// ============================================================================
Status TransactionalHiveReader::on_after_read_block(Block* block, size_t* /*read_rows*/) {
    Block::erase_useless_column(block, block->columns() - TransactionalHive::READ_PARAMS.size());
    for (const auto& i : TransactionalHive::READ_PARAMS) {
        col_name_to_block_idx_ref()->erase(i.column_lower_case);
    }
    return Status::OK();
}

} // namespace doris

Coverage Report

Created: 2026-04-21 19:40

Line	Count	Source
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#include "format/table/transactional_hive_reader.h"
19
20		#include <re2/re2.h>
21
22		#include "core/data_type/data_type_factory.hpp"
23		#include "format/orc/vorc_reader.h"
24		#include "format/table/table_schema_change_helper.h"
25		#include "transactional_hive_common.h"
26
27		namespace doris {
28
29		namespace io {
30		struct IOContext;
31		} // namespace io
32		class VExprContext;
33		} // namespace doris
34
35		namespace doris {
36
37		TransactionalHiveReader::TransactionalHiveReader(RuntimeProfile* profile, RuntimeState* state,
38		const TFileScanRangeParams& params,
39		const TFileRangeDesc& range, size_t batch_size,
40		const std::string& ctz, io::IOContext* io_ctx,
41		FileMetaCache* meta_cache)
42	0	: OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache, false) {
43	0	static const char* transactional_hive_profile = "TransactionalHiveProfile";
44	0	ADD_TIMER(get_profile(), transactional_hive_profile);
45	0	_transactional_orc_profile.num_delete_files = ADD_CHILD_COUNTER(
46	0	get_profile(), "NumDeleteFiles", TUnit::UNIT, transactional_hive_profile);
47	0	_transactional_orc_profile.num_delete_rows = ADD_CHILD_COUNTER(
48	0	get_profile(), "NumDeleteRows", TUnit::UNIT, transactional_hive_profile);
49	0	_transactional_orc_profile.delete_files_read_time =
50	0	ADD_CHILD_TIMER(get_profile(), "DeleteFileReadTime", transactional_hive_profile);
51	0	}
52
53		// ============================================================================
54		// on_before_init_reader: ACID schema mapping
55		// ============================================================================
56	0	Status TransactionalHiveReader::on_before_init_reader(ReaderInitContext* ctx) {
57	0	_column_descs = ctx->column_descs;
58	0	_fill_col_name_to_block_idx = ctx->col_name_to_block_idx;
59	0	RETURN_IF_ERROR(
60	0	_extract_partition_values(*ctx->range, ctx->tuple_descriptor, _fill_partition_values));
61	0	for (auto& desc : *ctx->column_descs) {
62	0	if (desc.category == ColumnCategory::REGULAR \|\|
63	0	desc.category == ColumnCategory::GENERATED) {
64	0	_col_names.push_back(desc.name);
65	0	} else if (desc.category == ColumnCategory::SYNTHESIZED &&
66	0	desc.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
67	0	auto topn_row_id_column_iter = _create_topn_row_id_column_iterator();
68	0	this->register_synthesized_column_handler(
69	0	desc.name,
70	0	[iter = std::move(topn_row_id_column_iter), this, &desc](
71	0	Block* block, size_t rows) -> Status {
72	0	return fill_topn_row_id(iter, desc.name, block, rows);
73	0	});
74	0	continue;
75	0	}
76	0	}
77
78	0	_is_acid = true;
79		// Add ACID column names (originalTransaction, bucket, rowId, etc.)
80	0	_col_names.insert(_col_names.end(), TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
81	0	TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end());
82	0	ctx->column_names = _col_names;
83
84		// Get ORC file type
85	0	const orc::Type* orc_type_ptr = nullptr;
86	0	RETURN_IF_ERROR(get_file_type(&orc_type_ptr));
87	0	const auto& orc_type = *orc_type_ptr;
88
89		// Add ACID metadata columns to table_info_node
90	0	for (auto idx = 0; idx < TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.size(); idx++) {
91	0	table_info_node_ptr->add_children(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE[idx],
92	0	TransactionalHive::READ_ROW_COLUMN_NAMES[idx],
93	0	std::make_shared<ScalarNode>());
94	0	}
95
96		// https://issues.apache.org/jira/browse/HIVE-15190
97	0	auto row_orc_type = orc_type.getSubtype(TransactionalHive::ROW_OFFSET);
98	0	std::vector<std::string> row_names;
99	0	std::map<std::string, uint64_t> row_names_map;
100	0	for (uint64_t idx = 0; idx < row_orc_type->getSubtypeCount(); idx++) {
101	0	const auto& file_column_name = row_orc_type->getFieldName(idx);
102	0	row_names.emplace_back(file_column_name);
103	0	row_names_map.emplace(file_column_name, idx);
104	0	}
105
106		// Match table columns to file columns by name
107	0	for (const auto& slot : ctx->tuple_descriptor->slots()) {
108	0	const auto& slot_name = slot->col_name();
109
110	0	if (std::count(TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
111	0	TransactionalHive::READ_ROW_COLUMN_NAMES_LOWER_CASE.end(), slot_name) > 0) {
112	0	return Status::InternalError("Column {} conflicts with ACID metadata column",
113	0	slot_name);
114	0	}
115
116	0	if (row_names_map.contains(slot_name)) {
117	0	std::shared_ptr<Node> child_node = nullptr;
118	0	RETURN_IF_ERROR(BuildTableInfoUtil::by_orc_name(
119	0	slot->type(), row_orc_type->getSubtype(row_names_map[slot_name]), child_node));
120	0	auto file_column_name = fmt::format(
121	0	"{}.{}", TransactionalHive::ACID_COLUMN_NAMES[TransactionalHive::ROW_OFFSET],
122	0	slot_name);
123	0	table_info_node_ptr->add_children(slot_name, file_column_name, child_node);
124	0	} else {
125	0	table_info_node_ptr->add_not_exist_children(slot_name);
126	0	}
127	0	}
128	0	ctx->table_info_node = table_info_node_ptr;
129	0	return Status::OK();
130	0	}
131
132		// ============================================================================
133		// on_after_init_reader: read delete delta files
134		// ============================================================================
135	0	Status TransactionalHiveReader::on_after_init_reader(ReaderInitContext* /ctx/) {
136	0	std::string data_file_path = get_scan_range().path;
137		// the path in _range has the namenode prefix removed,
138		// and the file_path in delete file is full path, so we should add it back.
139	0	if (get_scan_params().__isset.hdfs_params && get_scan_params().hdfs_params.__isset.fs_name) {
140	0	std::string fs_name = get_scan_params().hdfs_params.fs_name;
141	0	if (!starts_with(data_file_path, fs_name)) {
142	0	data_file_path = fs_name + data_file_path;
143	0	}
144	0	}
145
146	0	std::vector<std::string> delete_file_col_names;
147	0	int64_t num_delete_rows = 0;
148	0	int64_t num_delete_files = 0;
149	0	std::filesystem::path file_path(data_file_path);
150
151		// bucket_xxx_attemptId => bucket_xxx
152	0	auto remove_bucket_attemptId = [](const std::string& str) {
153	0	re2::RE2 pattern("^bucket_\\d+_\\d+$");
154	0	if (re2::RE2::FullMatch(str, pattern)) {
155	0	size_t pos = str.rfind('_');
156	0	if (pos != std::string::npos) {
157	0	return str.substr(0, pos);
158	0	}
159	0	}
160	0	return str;
161	0	};
162
163	0	SCOPED_TIMER(_transactional_orc_profile.delete_files_read_time);
164	0	for (const auto& delete_delta :
165	0	get_scan_range().table_format_params.transactional_hive_params.delete_deltas) {
166	0	const std::string file_name = file_path.filename().string();
167
168	0	std::vector<std::string> delete_delta_file_names;
169	0	for (const auto& x : delete_delta.file_names) {
170	0	delete_delta_file_names.emplace_back(remove_bucket_attemptId(x));
171	0	}
172	0	auto iter = std::find(delete_delta_file_names.begin(), delete_delta_file_names.end(),
173	0	remove_bucket_attemptId(file_name));
174	0	if (iter == delete_delta_file_names.end()) {
175	0	continue;
176	0	}
177	0	auto delete_file =
178	0	fmt::format("{}/{}", delete_delta.directory_location,
179	0	delete_delta.file_names[iter - delete_delta_file_names.begin()]);
180
181	0	TFileRangeDesc delete_range;
182	0	delete_range.__set_fs_name(get_scan_range().fs_name);
183	0	delete_range.path = delete_file;
184	0	delete_range.start_offset = 0;
185	0	delete_range.size = -1;
186	0	delete_range.file_size = -1;
187
188	0	OrcReader delete_reader(get_profile(), get_state(), get_scan_params(), delete_range,
189	0	256 /batch_size/, get_state()->timezone(), get_io_ctx(),
190	0	_meta_cache, false);
191
192	0	auto acid_info_node = std::make_shared<StructNode>();
193	0	for (auto idx = 0; idx < TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.size();
194	0	idx++) {
195	0	auto const& table_column_name =
196	0	TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE[idx];
197	0	auto const& file_column_name = TransactionalHive::DELETE_ROW_COLUMN_NAMES[idx];
198	0	acid_info_node->add_children(table_column_name, file_column_name,
199	0	std::make_shared<ScalarNode>());
200	0	}
201
202	0	OrcInitContext delete_ctx;
203	0	delete_ctx.column_names.assign(
204	0	TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
205	0	TransactionalHive::DELETE_ROW_COLUMN_NAMES_LOWER_CASE.end());
206	0	delete_ctx.col_name_to_block_idx = const_cast<std::unordered_map<std::string, uint32_t>*>(
207	0	&TransactionalHive::DELETE_COL_NAME_TO_BLOCK_IDX);
208	0	delete_ctx.table_info_node = acid_info_node;
209	0	RETURN_IF_ERROR(delete_reader.init_reader(&delete_ctx));
210
211	0	bool eof = false;
212	0	while (!eof) {
213	0	Block block;
214	0	for (const auto& i : TransactionalHive::DELETE_ROW_PARAMS) {
215	0	DataTypePtr data_type = DataTypeFactory::instance().create_data_type(i.type, false);
216	0	MutableColumnPtr data_column = data_type->create_column();
217	0	block.insert(ColumnWithTypeAndName(std::move(data_column), data_type,
218	0	i.column_lower_case));
219	0	}
220	0	eof = false;
221	0	size_t read_rows = 0;
222	0	RETURN_IF_ERROR(delete_reader.get_next_block(&block, &read_rows, &eof));
223	0	if (read_rows > 0) {
224	0	static int ORIGINAL_TRANSACTION_INDEX = 0;
225	0	static int BUCKET_ID_INDEX = 1;
226	0	static int ROW_ID_INDEX = 2;
227	0	const auto& original_transaction_column = assert_cast<const ColumnInt64&>(
228	0	*block.get_by_position(ORIGINAL_TRANSACTION_INDEX).column);
229	0	const auto& bucket_id_column = assert_cast<const ColumnInt32&>(
230	0	*block.get_by_position(BUCKET_ID_INDEX).column);
231	0	const auto& row_id_column = assert_cast<const ColumnInt64&>(
232	0	*block.get_by_position(ROW_ID_INDEX).column);
233
234	0	DCHECK_EQ(original_transaction_column.size(), read_rows);
235	0	DCHECK_EQ(bucket_id_column.size(), read_rows);
236	0	DCHECK_EQ(row_id_column.size(), read_rows);
237
238	0	for (int i = 0; i < read_rows; ++i) {
239	0	Int64 original_transaction = original_transaction_column.get_int(i);
240	0	Int64 bucket_id = bucket_id_column.get_int(i);
241	0	Int64 row_id = row_id_column.get_int(i);
242	0	AcidRowID delete_row_id = {original_transaction, bucket_id, row_id};
243	0	_acid_delete_rows.insert(delete_row_id);
244	0	++num_delete_rows;
245	0	}
246	0	}
247	0	}
248	0	++num_delete_files;
249	0	}
250	0	if (num_delete_rows > 0) {
251	0	set_push_down_agg_type(TPushAggOp::NONE);
252	0	set_delete_rows(&_acid_delete_rows);
253	0	COUNTER_UPDATE(_transactional_orc_profile.num_delete_files, num_delete_files);
254	0	COUNTER_UPDATE(_transactional_orc_profile.num_delete_rows, num_delete_rows);
255	0	}
256	0	return Status::OK();
257	0	}
258
259		// ============================================================================
260		// on_before_read_block: expand ACID columns into block
261		// TODO: Consider caching ACID column templates at init time to avoid repeated
262		// create_column + map update on every batch. Requires a block template mechanism.
263		// ============================================================================
264	0	Status TransactionalHiveReader::on_before_read_block(Block* block) {
265	0	for (const auto& i : TransactionalHive::READ_PARAMS) {
266	0	DataTypePtr data_type = get_data_type_with_default_argument(
267	0	DataTypeFactory::instance().create_data_type(i.type, false));
268	0	MutableColumnPtr data_column = data_type->create_column();
269	0	(*col_name_to_block_idx_ref())[i.column_lower_case] =
270	0	static_cast<uint32_t>(block->columns());
271	0	block->insert(
272	0	ColumnWithTypeAndName(std::move(data_column), data_type, i.column_lower_case));
273	0	}
274	0	return Status::OK();
275	0	}
276
277		// ============================================================================
278		// on_after_read_block: shrink ACID columns from block
279		// ============================================================================
280	0	Status TransactionalHiveReader::on_after_read_block(Block* block, size_t* /read_rows/) {
281	0	Block::erase_useless_column(block, block->columns() - TransactionalHive::READ_PARAMS.size());
282	0	for (const auto& i : TransactionalHive::READ_PARAMS) {
283	0	col_name_to_block_idx_ref()->erase(i.column_lower_case);
284	0	}
285	0	return Status::OK();
286	0	}
287
288		} // namespace doris