Coverage Report

Created: 2026-05-15 00:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/fs/stream_load_pipe.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/internal_service.pb.h>
21
22
#include <condition_variable>
23
#include <cstddef>
24
#include <cstdint>
25
#include <deque>
26
#include <memory>
27
#include <mutex>
28
#include <string>
29
30
#include "common/status.h"
31
#include "core/custom_allocator.h"
32
#include "io/fs/file_reader.h"
33
#include "io/fs/path.h"
34
#include "load/message_body_sink.h"
35
#include "util/byte_buffer.h"
36
#include "util/slice.h"
37
38
namespace doris::io {
39
struct IOContext;
40
41
static inline constexpr size_t kMaxPipeBufferedBytes = 4 * 1024 * 1024;
42
43
class StreamLoadPipe : public MessageBodySink, public FileReader {
44
public:
45
    StreamLoadPipe(size_t max_buffered_bytes = kMaxPipeBufferedBytes,
46
                   size_t min_chunk_size = 64 * 1024, int64_t total_length = -1,
47
                   bool use_proto = false);
48
    ~StreamLoadPipe() override;
49
50
    Status append_and_flush(const char* data, size_t size, size_t proto_byte_size = 0);
51
52
    Status append(std::unique_ptr<PDataRow>&& row);
53
    Status append(const char* data, size_t size) override;
54
    Status append(const ByteBufferPtr& buf) override;
55
56
0
    virtual Status append_with_line_delimiter(const char* data, size_t size) {
57
0
        RETURN_IF_ERROR(append(data, size));
58
0
        return append("\n", 1);
59
0
    }
60
61
8
    virtual Status append_json(const char* data, size_t size) {
62
8
        return append_and_flush(data, size);
63
8
    }
64
65
0
    const Path& path() const override { return _path; }
66
67
0
    size_t size() const override { return 0; }
68
69
0
    int64_t mtime() const override { return 0; }
70
71
    // called when consumer finished
72
0
    Status close() override {
73
0
        if (!(_finished || _cancelled)) {
74
0
            cancel("closed");
75
0
        }
76
0
        return Status::OK();
77
0
    }
78
79
0
    bool closed() const override { return _cancelled; }
80
81
    // called when producer finished
82
    virtual Status finish() override;
83
84
    // called when producer/consumer failed
85
    virtual void cancel(const std::string& reason) override;
86
87
    Status read_one_message(DorisUniqueBufferPtr<uint8_t>* data, size_t* length);
88
89
6
    size_t get_queue_size() { return _buf_queue.size(); }
90
91
    // used for pipeline load, which use TUniqueId(lo: query_id.lo + fragment_id, hi: query_id.hi) as pipe_id
92
    static TUniqueId calculate_pipe_id(const UniqueId& query_id, int32_t fragment_id);
93
94
6
    size_t max_capacity() const { return _max_buffered_bytes; }
95
96
    size_t current_capacity();
97
98
0
    bool is_chunked_transfer() const { return _is_chunked_transfer; }
99
100
0
    void set_is_chunked_transfer(bool is_chunked_transfer) {
101
0
        _is_chunked_transfer = is_chunked_transfer;
102
0
    }
103
104
protected:
105
    Status read_at_impl(size_t offset, Slice result, size_t* bytes_read,
106
                        const IOContext* io_ctx) override;
107
108
private:
109
    // read the next buffer from _buf_queue
110
    Status _read_next_buffer(DorisUniqueBufferPtr<uint8_t>* data, size_t* length);
111
112
    Status _append(const ByteBufferPtr& buf, size_t proto_byte_size = 0);
113
114
    // Blocking queue
115
    std::mutex _lock;
116
    size_t _buffered_bytes;
117
    size_t _proto_buffered_bytes;
118
    size_t _max_buffered_bytes;
119
    size_t _min_chunk_size;
120
    // The total amount of data expected to be read.
121
    // In some scenarios, such as loading json format data through stream load,
122
    // the data needs to be completely read before it can be parsed,
123
    // so the total size of the data needs to be known.
124
    // The default is -1, which means that the data arrives in a stream
125
    // and the length is unknown.
126
    // size_t is unsigned, so use int64_t
127
    int64_t _total_length = -1;
128
    bool _use_proto = false;
129
    std::deque<ByteBufferPtr> _buf_queue;
130
    std::deque<std::unique_ptr<PDataRow>> _data_row_ptrs;
131
    std::condition_variable _put_cond;
132
    std::condition_variable _get_cond;
133
134
    ByteBufferPtr _write_buf;
135
136
    // no use, only for compatibility with the `Path` interface
137
    Path _path = "";
138
139
    // When importing JSON data and using chunked transfer encoding,
140
    // the data needs to be completely read before it can be parsed.
141
    bool _is_chunked_transfer = false;
142
};
143
} // namespace doris::io