Coverage Report

Created: 2026-03-13 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/fs/stream_load_pipe.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/internal_service.pb.h>
21
22
#include <condition_variable>
23
#include <cstddef>
24
#include <cstdint>
25
#include <deque>
26
#include <memory>
27
#include <mutex>
28
#include <string>
29
30
#include "common/status.h"
31
#include "core/custom_allocator.h"
32
#include "io/fs/file_reader.h"
33
#include "io/fs/path.h"
34
#include "load/message_body_sink.h"
35
#include "util/byte_buffer.h"
36
#include "util/slice.h"
37
38
namespace doris::io {
39
struct IOContext;
40
41
static inline constexpr size_t kMaxPipeBufferedBytes = 4 * 1024 * 1024;
42
43
class StreamLoadPipe : public MessageBodySink, public FileReader {
44
public:
45
    StreamLoadPipe(size_t max_buffered_bytes = kMaxPipeBufferedBytes,
46
                   size_t min_chunk_size = 64 * 1024, int64_t total_length = -1,
47
                   bool use_proto = false);
48
    ~StreamLoadPipe() override;
49
50
    Status append_and_flush(const char* data, size_t size, size_t proto_byte_size = 0);
51
52
    Status append(std::unique_ptr<PDataRow>&& row);
53
    Status append(const char* data, size_t size) override;
54
    Status append(const ByteBufferPtr& buf) override;
55
56
0
    const Path& path() const override { return _path; }
57
58
1.96k
    size_t size() const override { return 0; }
59
60
0
    int64_t mtime() const override { return 0; }
61
62
    // called when consumer finished
63
1.85k
    Status close() override {
64
1.85k
        if (!(_finished || _cancelled)) {
65
0
            cancel("closed");
66
0
        }
67
1.85k
        return Status::OK();
68
1.85k
    }
69
70
0
    bool closed() const override { return _cancelled; }
71
72
    // called when producer finished
73
    virtual Status finish() override;
74
75
    // called when producer/consumer failed
76
    virtual void cancel(const std::string& reason) override;
77
78
    Status read_one_message(DorisUniqueBufferPtr<uint8_t>* data, size_t* length);
79
80
    size_t get_queue_size() { return _buf_queue.size(); }
81
82
    // used for pipeline load, which use TUniqueId(lo: query_id.lo + fragment_id, hi: query_id.hi) as pipe_id
83
    static TUniqueId calculate_pipe_id(const UniqueId& query_id, int32_t fragment_id);
84
85
15
    size_t max_capacity() const { return _max_buffered_bytes; }
86
87
    size_t current_capacity();
88
89
244
    bool is_chunked_transfer() const { return _is_chunked_transfer; }
90
91
313
    void set_is_chunked_transfer(bool is_chunked_transfer) {
92
313
        _is_chunked_transfer = is_chunked_transfer;
93
313
    }
94
95
protected:
96
    Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
97
                        const IOContext* io_ctx) override;
98
99
private:
100
    // read the next buffer from _buf_queue
101
    Status _read_next_buffer(DorisUniqueBufferPtr<uint8_t>* data, size_t* length);
102
103
    Status _append(const ByteBufferPtr& buf, size_t proto_byte_size = 0);
104
105
    // Blocking queue
106
    std::mutex _lock;
107
    size_t _buffered_bytes;
108
    size_t _proto_buffered_bytes;
109
    size_t _max_buffered_bytes;
110
    size_t _min_chunk_size;
111
    // The total amount of data expected to be read.
112
    // In some scenarios, such as loading json format data through stream load,
113
    // the data needs to be completely read before it can be parsed,
114
    // so the total size of the data needs to be known.
115
    // The default is -1, which means that the data arrives in a stream
116
    // and the length is unknown.
117
    // size_t is unsigned, so use int64_t
118
    int64_t _total_length = -1;
119
    bool _use_proto = false;
120
    std::deque<ByteBufferPtr> _buf_queue;
121
    std::deque<std::unique_ptr<PDataRow>> _data_row_ptrs;
122
    std::condition_variable _put_cond;
123
    std::condition_variable _get_cond;
124
125
    ByteBufferPtr _write_buf;
126
127
    // no use, only for compatibility with the `Path` interface
128
    Path _path = "";
129
130
    // When importing JSON data and using chunked transfer encoding,
131
    // the data needs to be completely read before it can be parsed.
132
    bool _is_chunked_transfer = false;
133
};
134
} // namespace doris::io