be/src/io/fs/stream_load_pipe.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/internal_service.pb.h> |
21 | | |
22 | | #include <condition_variable> |
23 | | #include <cstddef> |
24 | | #include <cstdint> |
25 | | #include <deque> |
26 | | #include <memory> |
27 | | #include <mutex> |
28 | | #include <string> |
29 | | |
30 | | #include "common/status.h" |
31 | | #include "core/custom_allocator.h" |
32 | | #include "io/fs/file_reader.h" |
33 | | #include "io/fs/path.h" |
34 | | #include "load/message_body_sink.h" |
35 | | #include "util/byte_buffer.h" |
36 | | #include "util/slice.h" |
37 | | |
38 | | namespace doris::io { |
39 | | struct IOContext; |
40 | | |
41 | | static inline constexpr size_t kMaxPipeBufferedBytes = 4 * 1024 * 1024; |
42 | | |
43 | | class StreamLoadPipe : public MessageBodySink, public FileReader { |
44 | | public: |
45 | | StreamLoadPipe(size_t max_buffered_bytes = kMaxPipeBufferedBytes, |
46 | | size_t min_chunk_size = 64 * 1024, int64_t total_length = -1, |
47 | | bool use_proto = false); |
48 | | ~StreamLoadPipe() override; |
49 | | |
50 | | Status append_and_flush(const char* data, size_t size, size_t proto_byte_size = 0); |
51 | | |
52 | | Status append(std::unique_ptr<PDataRow>&& row); |
53 | | Status append(const char* data, size_t size) override; |
54 | | Status append(const ByteBufferPtr& buf) override; |
55 | | |
56 | 0 | const Path& path() const override { return _path; } |
57 | | |
58 | 1.96k | size_t size() const override { return 0; } |
59 | | |
60 | 0 | int64_t mtime() const override { return 0; } |
61 | | |
62 | | // called when consumer finished |
63 | 1.85k | Status close() override { |
64 | 1.85k | if (!(_finished || _cancelled)) { |
65 | 0 | cancel("closed"); |
66 | 0 | } |
67 | 1.85k | return Status::OK(); |
68 | 1.85k | } |
69 | | |
70 | 0 | bool closed() const override { return _cancelled; } |
71 | | |
72 | | // called when producer finished |
73 | | virtual Status finish() override; |
74 | | |
75 | | // called when producer/consumer failed |
76 | | virtual void cancel(const std::string& reason) override; |
77 | | |
78 | | Status read_one_message(DorisUniqueBufferPtr<uint8_t>* data, size_t* length); |
79 | | |
80 | | size_t get_queue_size() { return _buf_queue.size(); } |
81 | | |
82 | | // used for pipeline load, which use TUniqueId(lo: query_id.lo + fragment_id, hi: query_id.hi) as pipe_id |
83 | | static TUniqueId calculate_pipe_id(const UniqueId& query_id, int32_t fragment_id); |
84 | | |
85 | 15 | size_t max_capacity() const { return _max_buffered_bytes; } |
86 | | |
87 | | size_t current_capacity(); |
88 | | |
89 | 244 | bool is_chunked_transfer() const { return _is_chunked_transfer; } |
90 | | |
91 | 313 | void set_is_chunked_transfer(bool is_chunked_transfer) { |
92 | 313 | _is_chunked_transfer = is_chunked_transfer; |
93 | 313 | } |
94 | | |
95 | | protected: |
96 | | Status read_at_impl(size_t offset, Slice result, size_t* bytes_read, |
97 | | const IOContext* io_ctx) override; |
98 | | |
99 | | private: |
100 | | // read the next buffer from _buf_queue |
101 | | Status _read_next_buffer(DorisUniqueBufferPtr<uint8_t>* data, size_t* length); |
102 | | |
103 | | Status _append(const ByteBufferPtr& buf, size_t proto_byte_size = 0); |
104 | | |
105 | | // Blocking queue |
106 | | std::mutex _lock; |
107 | | size_t _buffered_bytes; |
108 | | size_t _proto_buffered_bytes; |
109 | | size_t _max_buffered_bytes; |
110 | | size_t _min_chunk_size; |
111 | | // The total amount of data expected to be read. |
112 | | // In some scenarios, such as loading json format data through stream load, |
113 | | // the data needs to be completely read before it can be parsed, |
114 | | // so the total size of the data needs to be known. |
115 | | // The default is -1, which means that the data arrives in a stream |
116 | | // and the length is unknown. |
117 | | // size_t is unsigned, so use int64_t |
118 | | int64_t _total_length = -1; |
119 | | bool _use_proto = false; |
120 | | std::deque<ByteBufferPtr> _buf_queue; |
121 | | std::deque<std::unique_ptr<PDataRow>> _data_row_ptrs; |
122 | | std::condition_variable _put_cond; |
123 | | std::condition_variable _get_cond; |
124 | | |
125 | | ByteBufferPtr _write_buf; |
126 | | |
127 | | // no use, only for compatibility with the `Path` interface |
128 | | Path _path = ""; |
129 | | |
130 | | // When importing JSON data and using chunked transfer encoding, |
131 | | // the data needs to be completely read before it can be parsed. |
132 | | bool _is_chunked_transfer = false; |
133 | | }; |
134 | | } // namespace doris::io |