be/src/io/fs/packed_file_trailer.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "io/fs/packed_file_trailer.h" |
19 | | |
20 | | #include <array> |
21 | | #include <fstream> |
22 | | |
23 | | #include "common/status.h" |
24 | | #include "util/coding.h" |
25 | | |
26 | | namespace doris::io { |
27 | | |
28 | | Status parse_packed_file_trailer(std::string_view data, cloud::PackedFileFooterPB* debug_pb, |
29 | 3 | uint32_t* version) { |
30 | 3 | if (debug_pb == nullptr || version == nullptr) { |
31 | 0 | return Status::InvalidArgument("Output parameters must not be null"); |
32 | 0 | } |
33 | 3 | if (data.size() < kPackedFileTrailerSuffixSize) { |
34 | 0 | return Status::InternalError("Packed file too small to contain trailer"); |
35 | 0 | } |
36 | | |
37 | 3 | const size_t suffix_offset = data.size() - kPackedFileTrailerSuffixSize; |
38 | 3 | const auto* suffix_ptr = reinterpret_cast<const uint8_t*>(data.data() + suffix_offset); |
39 | 3 | const uint32_t trailer_size = decode_fixed32_le(suffix_ptr); |
40 | 3 | const uint32_t trailer_version = decode_fixed32_le(suffix_ptr + sizeof(uint32_t)); |
41 | | |
42 | | // Preferred format: [PackedFileFooterPB][length][version] |
43 | 3 | if (trailer_size > 0 && trailer_size <= data.size() - kPackedFileTrailerSuffixSize) { |
44 | 2 | const size_t payload_offset = data.size() - kPackedFileTrailerSuffixSize - trailer_size; |
45 | 2 | std::string_view payload(data.data() + payload_offset, trailer_size); |
46 | 2 | if (payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) { |
47 | 0 | return Status::InternalError("Packed file trailer payload too large"); |
48 | 0 | } |
49 | 2 | cloud::PackedFileFooterPB parsed_pb; |
50 | 2 | if (parsed_pb.ParseFromArray(payload.data(), static_cast<int>(payload.size()))) { |
51 | 2 | debug_pb->Swap(&parsed_pb); |
52 | 2 | *version = trailer_version; |
53 | 2 | return Status::OK(); |
54 | 2 | } |
55 | 2 | } |
56 | | |
57 | | // Legacy format fallback: [PackedFileInfoPB][length] |
58 | 1 | if (data.size() < sizeof(uint32_t)) { |
59 | 0 | return Status::InternalError("Packed file trailer corrupted"); |
60 | 0 | } |
61 | 1 | const size_t legacy_suffix_offset = data.size() - sizeof(uint32_t); |
62 | 1 | const auto* legacy_ptr = reinterpret_cast<const uint8_t*>(data.data() + legacy_suffix_offset); |
63 | 1 | const uint32_t legacy_size = decode_fixed32_le(legacy_ptr); |
64 | 1 | if (legacy_size == 0 || legacy_size > data.size() - sizeof(uint32_t)) { |
65 | 0 | return Status::InternalError("Packed file trailer corrupted"); |
66 | 0 | } |
67 | 1 | const size_t legacy_payload_offset = data.size() - sizeof(uint32_t) - legacy_size; |
68 | 1 | std::string_view legacy_payload(data.data() + legacy_payload_offset, legacy_size); |
69 | 1 | cloud::PackedFileInfoPB packed_info; |
70 | 1 | if (legacy_payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) { |
71 | 0 | return Status::InternalError("Packed file legacy trailer payload too large"); |
72 | 0 | } |
73 | 1 | if (!packed_info.ParseFromArray(legacy_payload.data(), |
74 | 1 | static_cast<int>(legacy_payload.size()))) { |
75 | 0 | return Status::InternalError("Failed to parse packed file trailer"); |
76 | 0 | } |
77 | 1 | debug_pb->Clear(); |
78 | 1 | debug_pb->mutable_packed_file_info()->Swap(&packed_info); |
79 | 1 | *version = 0; |
80 | 1 | return Status::OK(); |
81 | 1 | } |
82 | | |
83 | | Status read_packed_file_trailer(const std::string& file_path, cloud::PackedFileFooterPB* debug_pb, |
84 | 2 | uint32_t* version) { |
85 | 2 | if (debug_pb == nullptr || version == nullptr) { |
86 | 0 | return Status::InvalidArgument("Output parameters must not be null"); |
87 | 0 | } |
88 | | |
89 | 2 | std::ifstream file(file_path, std::ios::binary); |
90 | 2 | if (!file.is_open()) { |
91 | 0 | return Status::IOError("Failed to open packed file {}", file_path); |
92 | 0 | } |
93 | | |
94 | 2 | file.seekg(0, std::ios::end); |
95 | 2 | const std::streamoff file_size = file.tellg(); |
96 | 2 | if (file_size < static_cast<std::streamoff>(sizeof(uint32_t))) { |
97 | 0 | return Status::InternalError("Packed file {} is too small", file_path); |
98 | 0 | } |
99 | | |
100 | 2 | auto read_tail = [&](std::streamoff count, std::string* out) -> Status { |
101 | 2 | out->assign(static_cast<size_t>(count), '\0'); |
102 | 2 | file.seekg(file_size - count); |
103 | 2 | file.read(out->data(), count); |
104 | 2 | if (!file) { |
105 | 0 | return Status::IOError("Failed to read last {} bytes from {}", count, file_path); |
106 | 0 | } |
107 | 2 | return Status::OK(); |
108 | 2 | }; |
109 | | |
110 | | // Try new format first. |
111 | 2 | if (file_size >= static_cast<std::streamoff>(kPackedFileTrailerSuffixSize)) { |
112 | 2 | std::array<char, kPackedFileTrailerSuffixSize> suffix {}; |
113 | 2 | file.seekg(file_size - static_cast<std::streamoff>(suffix.size())); |
114 | 2 | file.read(suffix.data(), suffix.size()); |
115 | 2 | if (file) { |
116 | 2 | const uint32_t trailer_size = |
117 | 2 | decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data())); |
118 | 2 | const uint32_t trailer_version = |
119 | 2 | decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data()) + sizeof(uint32_t)); |
120 | 2 | const std::streamoff required = |
121 | 2 | static_cast<std::streamoff>(kPackedFileTrailerSuffixSize + trailer_size); |
122 | 2 | if (trailer_size > 0 && file_size >= required) { |
123 | 1 | std::string tail; |
124 | 1 | RETURN_IF_ERROR(read_tail(required, &tail)); |
125 | 1 | Status st = parse_packed_file_trailer(tail, debug_pb, version); |
126 | 1 | if (st.ok() && *version == trailer_version) { |
127 | 1 | return st; |
128 | 1 | } |
129 | 1 | } |
130 | 2 | } |
131 | 1 | file.clear(); |
132 | 1 | } |
133 | | |
134 | | // Legacy fallback: PackedFileInfoPB + length. |
135 | 1 | std::array<char, sizeof(uint32_t)> legacy_suffix {}; |
136 | 1 | file.seekg(file_size - static_cast<std::streamoff>(legacy_suffix.size())); |
137 | 1 | file.read(legacy_suffix.data(), legacy_suffix.size()); |
138 | 1 | if (!file) { |
139 | 0 | return Status::IOError("Failed to read legacy trailer length from {}", file_path); |
140 | 0 | } |
141 | 1 | const uint32_t legacy_size = |
142 | 1 | decode_fixed32_le(reinterpret_cast<uint8_t*>(legacy_suffix.data())); |
143 | 1 | const std::streamoff required = static_cast<std::streamoff>(sizeof(uint32_t) + legacy_size); |
144 | 1 | if (legacy_size == 0 || file_size < required) { |
145 | 0 | return Status::InternalError("Packed file trailer corrupted for {}", file_path); |
146 | 0 | } |
147 | 1 | std::string tail; |
148 | 1 | RETURN_IF_ERROR(read_tail(required, &tail)); |
149 | 1 | return parse_packed_file_trailer(tail, debug_pb, version); |
150 | 1 | } |
151 | | |
152 | | } // namespace doris::io |