be/src/load/group_commit/wal/wal_reader.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "wal_reader.h" |
19 | | |
20 | | #include <absl/strings/str_split.h> |
21 | | |
22 | | #include "agent/be_exec_version_manager.h" |
23 | | #include "common/logging.h" |
24 | | #include "core/block/block.h" |
25 | | #include "cpp/sync_point.h" |
26 | | #include "load/group_commit/wal/wal_manager.h" |
27 | | #include "runtime/runtime_state.h" |
28 | | |
29 | | namespace doris { |
30 | | #include "common/compile_check_begin.h" |
31 | 0 | WalReader::WalReader(RuntimeState* state) : _state(state) { |
32 | 0 | _wal_id = state->wal_id(); |
33 | 0 | } |
34 | | |
35 | 0 | Status WalReader::init_reader(const TupleDescriptor* tuple_descriptor) { |
36 | 0 | _tuple_descriptor = tuple_descriptor; |
37 | 0 | RETURN_IF_ERROR(_state->exec_env()->wal_mgr()->get_wal_path(_wal_id, _wal_path)); |
38 | 0 | _wal_reader = std::make_shared<doris::WalFileReader>(_wal_path); |
39 | 0 | RETURN_IF_ERROR(_wal_reader->init()); |
40 | 0 | return Status::OK(); |
41 | 0 | } |
42 | | |
43 | 0 | Status WalReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { |
44 | | //read src block |
45 | 0 | PBlock pblock; |
46 | 0 | auto st = _wal_reader->read_block(pblock); |
47 | 0 | if (st.is<ErrorCode::END_OF_FILE>()) { |
48 | 0 | LOG(INFO) << "read eof on wal:" << _wal_path; |
49 | 0 | *read_rows = 0; |
50 | 0 | *eof = true; |
51 | 0 | return Status::OK(); |
52 | 0 | } |
53 | 0 | if (!st.ok()) { |
54 | 0 | LOG(WARNING) << "Failed to read wal on path = " << _wal_path; |
55 | 0 | return st; |
56 | 0 | } |
57 | 0 | int be_exec_version = pblock.has_be_exec_version() ? pblock.be_exec_version() : 0; |
58 | 0 | if (!BeExecVersionManager::check_be_exec_version(be_exec_version)) { |
59 | 0 | return Status::DataQualityError("check be exec version fail when reading wal file {}", |
60 | 0 | _wal_path); |
61 | 0 | } |
62 | 0 | Block src_block; |
63 | 0 | size_t uncompressed_size = 0; |
64 | 0 | int64_t uncompressed_time = 0; |
65 | 0 | RETURN_IF_ERROR(src_block.deserialize(pblock, &uncompressed_size, &uncompressed_time)); |
66 | | //convert to dst block |
67 | 0 | Block dst_block; |
68 | 0 | int index = 0; |
69 | 0 | auto output_block_columns = block->get_columns_with_type_and_name(); |
70 | 0 | size_t output_block_column_size = output_block_columns.size(); |
71 | 0 | TEST_SYNC_POINT_CALLBACK("WalReader::set_column_id_count", &_column_id_count); |
72 | 0 | TEST_SYNC_POINT_CALLBACK("WalReader::set_out_block_column_size", &output_block_column_size); |
73 | 0 | if (_column_id_count != src_block.columns() || |
74 | 0 | output_block_column_size != _tuple_descriptor->slots().size()) { |
75 | 0 | return Status::InternalError( |
76 | 0 | "not equal wal _column_id_count={} vs wal block columns size={}, " |
77 | 0 | "output block columns size={} vs tuple_descriptor size={}", |
78 | 0 | std::to_string(_column_id_count), std::to_string(src_block.columns()), |
79 | 0 | std::to_string(output_block_column_size), |
80 | 0 | std::to_string(_tuple_descriptor->slots().size())); |
81 | 0 | } |
82 | 0 | for (auto* slot_desc : _tuple_descriptor->slots()) { |
83 | 0 | auto pos = _column_pos_map[slot_desc->col_unique_id()]; |
84 | 0 | if (pos >= src_block.columns()) { |
85 | 0 | return Status::InternalError("read wal {} fail, pos {}, columns size {}", _wal_path, |
86 | 0 | pos, src_block.columns()); |
87 | 0 | } |
88 | 0 | ColumnPtr column_ptr = src_block.get_by_position(pos).column; |
89 | 0 | if (!column_ptr && slot_desc->is_nullable()) { |
90 | 0 | column_ptr = make_nullable(column_ptr); |
91 | 0 | } |
92 | 0 | dst_block.insert(index, ColumnWithTypeAndName(std::move(column_ptr), |
93 | 0 | output_block_columns[index].type, |
94 | 0 | output_block_columns[index].name)); |
95 | 0 | index++; |
96 | 0 | } |
97 | 0 | block->swap(dst_block); |
98 | 0 | *read_rows = block->rows(); |
99 | 0 | VLOG_DEBUG << "read block rows:" << *read_rows; |
100 | 0 | return Status::OK(); |
101 | 0 | } |
102 | | |
103 | | Status WalReader::get_columns(std::unordered_map<std::string, DataTypePtr>* name_to_type, |
104 | 0 | std::unordered_set<std::string>* missing_cols) { |
105 | 0 | std::string col_ids; |
106 | 0 | RETURN_IF_ERROR(_wal_reader->read_header(_version, col_ids)); |
107 | 0 | std::vector<std::string> column_id_vector = |
108 | 0 | absl::StrSplit(col_ids, ",", absl::SkipWhitespace()); |
109 | 0 | _column_id_count = column_id_vector.size(); |
110 | 0 | try { |
111 | 0 | int64_t pos = 0; |
112 | 0 | for (auto col_id_str : column_id_vector) { |
113 | 0 | auto col_id = std::strtoll(col_id_str.c_str(), nullptr, 10); |
114 | 0 | _column_pos_map.emplace(col_id, pos); |
115 | 0 | pos++; |
116 | 0 | } |
117 | 0 | } catch (const std::invalid_argument& e) { |
118 | 0 | return Status::InvalidArgument("Invalid format, {}", e.what()); |
119 | 0 | } |
120 | 0 | return Status::OK(); |
121 | 0 | } |
122 | | |
123 | | #include "common/compile_check_end.h" |
124 | | } // namespace doris |