Coverage Report

Created: 2026-04-02 18:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/fs/http_file_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <atomic>
21
#include <map>
22
#include <memory>
23
#include <string>
24
25
#include "common/status.h"
26
#include "core/pod_array.h"
27
#include "io/fs/file_handle_cache.h"
28
#include "io/fs/file_reader.h"
29
#include "io/fs/file_system.h"
30
#include "runtime/runtime_profile.h"
31
#include "service/http/http_client.h"
32
#include "util/slice.h"
33
34
namespace doris::io {
35
typedef struct OpenFileInfo {
36
    Path path;
37
    std::map<std::string, std::string> extend_info;
38
} OpenFileInfo;
39
class HttpFileReader final : public FileReader {
40
public:
41
    static Result<FileReaderSPtr> create(const std::string& url,
42
                                         const std::map<std::string, std::string>& props,
43
                                         const FileReaderOptions& opts, RuntimeProfile* profile);
44
45
    explicit HttpFileReader(const OpenFileInfo& fileInfo, std::string url, int64_t mtime);
46
    ~HttpFileReader() override;
47
48
    Status open(const FileReaderOptions& opts);
49
    Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
50
                        const IOContext* io_ctx = nullptr) override;
51
    Status close() override;
52
0
    const Path& path() const override { return _path; }
53
0
    bool closed() const override { return _closed.load(std::memory_order_acquire); }
54
0
    size_t size() const override { return _file_size; }
55
56
0
    int64_t mtime() const override { return _mtime; }
57
58
private:
59
    // Prepare and initialize the HTTP client for a new request
60
    Status prepare_client(bool set_fail_on_error = true);
61
62
    // Detect if the HTTP server supports Range requests
63
    // Returns OK on success with _range_supported set appropriately
64
    Status detect_range_support();
65
66
    // Start the CDC client process
67
    // Called at the start of open() when enable_cdc_client=true.
68
    Status setup_cdc_client();
69
70
    PODArray<char> _read_buffer;
71
    static constexpr size_t READ_BUFFER_SIZE = 1 << 20; // 1MB
72
    // Default maximum file size for servers that don't support Range requests
73
    static constexpr size_t DEFAULT_MAX_REQUEST_SIZE = 100 << 20; // 100MB
74
75
    size_t _buffer_start = 0;
76
    size_t _buffer_end = 0;
77
    bool _size_known = false;
78
    bool _range_supported = true;
79
    std::string _etag;
80
    bool _initialized = false;
81
    std::map<std::string, std::string> _extend_kv;
82
    size_t _file_size = static_cast<size_t>(-1);
83
    Path _path;
84
    std::string _url;
85
    int64_t _last_modified = 0;
86
    std::atomic<bool> _closed = false;
87
    std::unique_ptr<HttpClient> _client;
88
    int64_t _mtime;
89
90
    // Configuration for non-Range request handling
91
    bool _enable_range_request = true;                         // Whether Range request is required
92
    size_t _max_request_size_bytes = DEFAULT_MAX_REQUEST_SIZE; // Max size for non-Range downloads
93
94
    // Full file cache for non-Range mode to avoid repeated downloads
95
    std::string _full_file_cache;   // Cache complete file content
96
    bool _full_file_cached = false; // Whether full file has been cached
97
98
    bool _enable_chunk_response = false; // Whether server returns chunk streaming response
99
};
100
101
} // namespace doris::io