Coverage Report

Created: 2026-03-16 21:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/fs/packed_file_trailer.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "io/fs/packed_file_trailer.h"
19
20
#include <array>
21
#include <fstream>
22
23
#include "common/status.h"
24
#include "util/coding.h"
25
26
namespace doris::io {
27
28
Status parse_packed_file_trailer(std::string_view data, cloud::PackedFileFooterPB* debug_pb,
29
3
                                 uint32_t* version) {
30
3
    if (debug_pb == nullptr || version == nullptr) {
31
0
        return Status::InvalidArgument("Output parameters must not be null");
32
0
    }
33
3
    if (data.size() < kPackedFileTrailerSuffixSize) {
34
0
        return Status::InternalError("Packed file too small to contain trailer");
35
0
    }
36
37
3
    const size_t suffix_offset = data.size() - kPackedFileTrailerSuffixSize;
38
3
    const auto* suffix_ptr = reinterpret_cast<const uint8_t*>(data.data() + suffix_offset);
39
3
    const uint32_t trailer_size = decode_fixed32_le(suffix_ptr);
40
3
    const uint32_t trailer_version = decode_fixed32_le(suffix_ptr + sizeof(uint32_t));
41
42
    // Preferred format: [PackedFileFooterPB][length][version]
43
3
    if (trailer_size > 0 && trailer_size <= data.size() - kPackedFileTrailerSuffixSize) {
44
2
        const size_t payload_offset = data.size() - kPackedFileTrailerSuffixSize - trailer_size;
45
2
        std::string_view payload(data.data() + payload_offset, trailer_size);
46
2
        if (payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
47
0
            return Status::InternalError("Packed file trailer payload too large");
48
0
        }
49
2
        cloud::PackedFileFooterPB parsed_pb;
50
2
        if (parsed_pb.ParseFromArray(payload.data(), static_cast<int>(payload.size()))) {
51
2
            debug_pb->Swap(&parsed_pb);
52
2
            *version = trailer_version;
53
2
            return Status::OK();
54
2
        }
55
2
    }
56
57
    // Legacy format fallback: [PackedFileInfoPB][length]
58
1
    if (data.size() < sizeof(uint32_t)) {
59
0
        return Status::InternalError("Packed file trailer corrupted");
60
0
    }
61
1
    const size_t legacy_suffix_offset = data.size() - sizeof(uint32_t);
62
1
    const auto* legacy_ptr = reinterpret_cast<const uint8_t*>(data.data() + legacy_suffix_offset);
63
1
    const uint32_t legacy_size = decode_fixed32_le(legacy_ptr);
64
1
    if (legacy_size == 0 || legacy_size > data.size() - sizeof(uint32_t)) {
65
0
        return Status::InternalError("Packed file trailer corrupted");
66
0
    }
67
1
    const size_t legacy_payload_offset = data.size() - sizeof(uint32_t) - legacy_size;
68
1
    std::string_view legacy_payload(data.data() + legacy_payload_offset, legacy_size);
69
1
    cloud::PackedFileInfoPB packed_info;
70
1
    if (legacy_payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
71
0
        return Status::InternalError("Packed file legacy trailer payload too large");
72
0
    }
73
1
    if (!packed_info.ParseFromArray(legacy_payload.data(),
74
1
                                    static_cast<int>(legacy_payload.size()))) {
75
0
        return Status::InternalError("Failed to parse packed file trailer");
76
0
    }
77
1
    debug_pb->Clear();
78
1
    debug_pb->mutable_packed_file_info()->Swap(&packed_info);
79
1
    *version = 0;
80
1
    return Status::OK();
81
1
}
82
83
Status read_packed_file_trailer(const std::string& file_path, cloud::PackedFileFooterPB* debug_pb,
84
2
                                uint32_t* version) {
85
2
    if (debug_pb == nullptr || version == nullptr) {
86
0
        return Status::InvalidArgument("Output parameters must not be null");
87
0
    }
88
89
2
    std::ifstream file(file_path, std::ios::binary);
90
2
    if (!file.is_open()) {
91
0
        return Status::IOError("Failed to open packed file {}", file_path);
92
0
    }
93
94
2
    file.seekg(0, std::ios::end);
95
2
    const std::streamoff file_size = file.tellg();
96
2
    if (file_size < static_cast<std::streamoff>(sizeof(uint32_t))) {
97
0
        return Status::InternalError("Packed file {} is too small", file_path);
98
0
    }
99
100
2
    auto read_tail = [&](std::streamoff count, std::string* out) -> Status {
101
2
        out->assign(static_cast<size_t>(count), '\0');
102
2
        file.seekg(file_size - count);
103
2
        file.read(out->data(), count);
104
2
        if (!file) {
105
0
            return Status::IOError("Failed to read last {} bytes from {}", count, file_path);
106
0
        }
107
2
        return Status::OK();
108
2
    };
109
110
    // Try new format first.
111
2
    if (file_size >= static_cast<std::streamoff>(kPackedFileTrailerSuffixSize)) {
112
2
        std::array<char, kPackedFileTrailerSuffixSize> suffix {};
113
2
        file.seekg(file_size - static_cast<std::streamoff>(suffix.size()));
114
2
        file.read(suffix.data(), suffix.size());
115
2
        if (file) {
116
2
            const uint32_t trailer_size =
117
2
                    decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data()));
118
2
            const uint32_t trailer_version =
119
2
                    decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data()) + sizeof(uint32_t));
120
2
            const std::streamoff required =
121
2
                    static_cast<std::streamoff>(kPackedFileTrailerSuffixSize + trailer_size);
122
2
            if (trailer_size > 0 && file_size >= required) {
123
1
                std::string tail;
124
1
                RETURN_IF_ERROR(read_tail(required, &tail));
125
1
                Status st = parse_packed_file_trailer(tail, debug_pb, version);
126
1
                if (st.ok() && *version == trailer_version) {
127
1
                    return st;
128
1
                }
129
1
            }
130
2
        }
131
1
        file.clear();
132
1
    }
133
134
    // Legacy fallback: PackedFileInfoPB + length.
135
1
    std::array<char, sizeof(uint32_t)> legacy_suffix {};
136
1
    file.seekg(file_size - static_cast<std::streamoff>(legacy_suffix.size()));
137
1
    file.read(legacy_suffix.data(), legacy_suffix.size());
138
1
    if (!file) {
139
0
        return Status::IOError("Failed to read legacy trailer length from {}", file_path);
140
0
    }
141
1
    const uint32_t legacy_size =
142
1
            decode_fixed32_le(reinterpret_cast<uint8_t*>(legacy_suffix.data()));
143
1
    const std::streamoff required = static_cast<std::streamoff>(sizeof(uint32_t) + legacy_size);
144
1
    if (legacy_size == 0 || file_size < required) {
145
0
        return Status::InternalError("Packed file trailer corrupted for {}", file_path);
146
0
    }
147
1
    std::string tail;
148
1
    RETURN_IF_ERROR(read_tail(required, &tail));
149
1
    return parse_packed_file_trailer(tail, debug_pb, version);
150
1
}
151
152
} // namespace doris::io