/root/doris/be/src/pipeline/pipeline.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <glog/logging.h> | 
| 21 |  |  | 
| 22 |  | #include <cstdint> | 
| 23 |  | #include <memory> | 
| 24 |  | #include <string_view> | 
| 25 |  | #include <utility> | 
| 26 |  | #include <vector> | 
| 27 |  |  | 
| 28 |  | #include "common/status.h" | 
| 29 |  | #include "pipeline/exec/operator.h" | 
| 30 |  | #include "util/runtime_profile.h" | 
| 31 |  |  | 
| 32 |  | namespace doris::pipeline { | 
| 33 |  |  | 
| 34 |  | class PipelineFragmentContext; | 
| 35 |  | class Pipeline; | 
| 36 |  |  | 
| 37 |  | using PipelinePtr = std::shared_ptr<Pipeline>; | 
| 38 |  | using Pipelines = std::vector<PipelinePtr>; | 
| 39 |  | using PipelineId = uint32_t; | 
| 40 |  |  | 
| 41 |  | class Pipeline : public std::enable_shared_from_this<Pipeline> { | 
| 42 |  |     friend class PipelineTask; | 
| 43 |  |     friend class PipelineFragmentContext; | 
| 44 |  |  | 
| 45 |  | public: | 
| 46 |  |     explicit Pipeline(PipelineId pipeline_id, int num_tasks, | 
| 47 |  |                       std::weak_ptr<PipelineFragmentContext> context, int num_tasks_of_parent) | 
| 48 |  |             : _pipeline_id(pipeline_id), | 
| 49 |  |               _num_tasks(num_tasks), | 
| 50 | 801k |               _num_tasks_of_parent(num_tasks_of_parent) { | 
| 51 | 801k |         _init_profile(); | 
| 52 | 801k |         _tasks.resize(_num_tasks, nullptr); | 
| 53 | 801k |     } | 
| 54 |  |  | 
| 55 |  |     // Add operators for pipelineX | 
| 56 |  |     Status add_operator(OperatorPtr& op, const int parallelism); | 
| 57 |  |     // prepare operators for pipelineX | 
| 58 |  |     Status prepare(RuntimeState* state); | 
| 59 |  |  | 
| 60 |  |     Status set_sink(DataSinkOperatorPtr& sink_operator); | 
| 61 |  |  | 
| 62 | 5.27M |     DataSinkOperatorXBase* sink() { return _sink.get(); } | 
| 63 | 8.12M |     Operators& operators() { return _operators; } | 
| 64 | 2.06M |     DataSinkOperatorPtr sink_shared_pointer() { return _sink; } | 
| 65 |  |  | 
| 66 | 544k |     [[nodiscard]] const RowDescriptor& output_row_desc() const { | 
| 67 | 544k |         return _operators.back()->row_desc(); | 
| 68 | 544k |     } | 
| 69 |  |  | 
| 70 | 19.2M |     [[nodiscard]] PipelineId id() const { return _pipeline_id; } | 
| 71 |  |  | 
| 72 | 832k |     static bool is_hash_exchange(ExchangeType idx) { | 
| 73 | 832k |         return idx == ExchangeType::HASH_SHUFFLE || idx == ExchangeType::BUCKET_HASH_SHUFFLE; | 
| 74 | 832k |     } | 
| 75 |  |  | 
| 76 |  |     // For HASH_SHUFFLE, BUCKET_HASH_SHUFFLE, and ADAPTIVE_PASSTHROUGH, | 
| 77 |  |     // data is processed and shuffled on the sink. | 
| 78 |  |     // Compared to PASSTHROUGH, this is a relatively heavy operation. | 
| 79 | 79.5k |     static bool heavy_operations_on_the_sink(ExchangeType idx) { | 
| 80 | 79.5k |         return idx == ExchangeType::HASH_SHUFFLE || idx == ExchangeType::BUCKET_HASH_SHUFFLE || | 
| 81 | 79.5k |                idx == ExchangeType::ADAPTIVE_PASSTHROUGH; | 
| 82 | 79.5k |     } | 
| 83 |  |  | 
| 84 |  |     bool need_to_local_exchange(const DataDistribution target_data_distribution, | 
| 85 |  |                                 const int idx) const; | 
| 86 | 650k |     void init_data_distribution() { | 
| 87 | 650k |         set_data_distribution(_operators.front()->required_data_distribution()); | 
| 88 | 650k |     } | 
| 89 | 892k |     void set_data_distribution(const DataDistribution& data_distribution) { | 
| 90 | 892k |         _data_distribution = data_distribution; | 
| 91 | 892k |     } | 
| 92 | 93.6k |     const DataDistribution& data_distribution() const { return _data_distribution; } | 
| 93 |  |  | 
| 94 | 1.88M |     std::vector<std::shared_ptr<Pipeline>>& children() { return _children; } | 
| 95 | 292k |     void set_children(std::shared_ptr<Pipeline> child) { _children.push_back(child); } | 
| 96 | 148k |     void set_children(std::vector<std::shared_ptr<Pipeline>> children) { _children = children; } | 
| 97 |  |  | 
| 98 | 2.06M |     void incr_created_tasks(int i, PipelineTask* task) { | 
| 99 | 2.06M |         _num_tasks_created++; | 
| 100 | 2.06M |         _num_tasks_running++; | 
| 101 | 2.06M |         DCHECK_LT(i, _tasks.size()); | 
| 102 | 2.06M |         _tasks[i] = task; | 
| 103 | 2.06M |     } | 
| 104 |  |  | 
| 105 |  |     void make_all_runnable(PipelineId wake_by); | 
| 106 |  |  | 
| 107 | 388k |     void set_num_tasks(int num_tasks) { | 
| 108 | 388k |         _num_tasks = num_tasks; | 
| 109 | 388k |         _tasks.resize(_num_tasks, nullptr); | 
| 110 | 433k |         for (auto& op : _operators) { | 
| 111 | 433k |             op->set_parallel_tasks(_num_tasks); | 
| 112 | 433k |         } | 
| 113 |  |  | 
| 114 | 388k | #ifndef NDEBUG | 
| 115 | 388k |         if (num_tasks > 1 && | 
| 116 | 388k |             std::any_of(_operators.begin(), _operators.end(), | 
| 117 | 264k |                         [&](OperatorPtr op) -> bool { return op->is_serial_operator(); })) { | 
| 118 | 0 |             DCHECK(false) << debug_string(); | 
| 119 | 0 |         } | 
| 120 | 388k | #endif | 
| 121 | 388k |     } | 
| 122 | 7.21M |     int num_tasks() const { return _num_tasks; } | 
| 123 | 2.06M |     bool close_task() { return _num_tasks_running.fetch_sub(1) == 1; } | 
| 124 |  |  | 
| 125 | 0 |     std::string debug_string() const { | 
| 126 | 0 |         fmt::memory_buffer debug_string_buffer; | 
| 127 | 0 |         fmt::format_to(debug_string_buffer, | 
| 128 | 0 |                        "Pipeline [id: {}, _num_tasks: {}, _num_tasks_created: {}]", _pipeline_id, | 
| 129 | 0 |                        _num_tasks, _num_tasks_created); | 
| 130 | 0 |         for (size_t i = 0; i < _operators.size(); i++) { | 
| 131 | 0 |             fmt::format_to(debug_string_buffer, "\n{}", _operators[i]->debug_string(i)); | 
| 132 | 0 |         } | 
| 133 | 0 |         fmt::format_to(debug_string_buffer, "\n{}", _sink->debug_string(_operators.size())); | 
| 134 | 0 |         return fmt::to_string(debug_string_buffer); | 
| 135 | 0 |     } | 
| 136 |  |  | 
| 137 | 197k |     int num_tasks_of_parent() const { return _num_tasks_of_parent; } | 
| 138 | 2.06M |     std::string& name() { return _name; } | 
| 139 |  |  | 
| 140 |  | private: | 
| 141 |  |     void _init_profile(); | 
| 142 |  |  | 
| 143 |  |     std::vector<std::pair<int, std::weak_ptr<Pipeline>>> _parents; | 
| 144 |  |     std::vector<std::pair<int, std::shared_ptr<Pipeline>>> _dependencies; | 
| 145 |  |  | 
| 146 |  |     std::vector<std::shared_ptr<Pipeline>> _children; | 
| 147 |  |  | 
| 148 |  |     PipelineId _pipeline_id; | 
| 149 |  |     int _previous_schedule_id = -1; | 
| 150 |  |  | 
| 151 |  |     // pipline id + operator names. init when: | 
| 152 |  |     //  build_operators(), if pipeline; | 
| 153 |  |     //  _build_pipelines() and _create_tree_helper(), if pipelineX. | 
| 154 |  |     std::string _name; | 
| 155 |  |  | 
| 156 |  |     std::unique_ptr<RuntimeProfile> _pipeline_profile; | 
| 157 |  |  | 
| 158 |  |     // Operators for pipelineX. All pipeline tasks share operators from this. | 
| 159 |  |     // [SourceOperator -> ... -> SinkOperator] | 
| 160 |  |     Operators _operators; | 
| 161 |  |     DataSinkOperatorPtr _sink = nullptr; | 
| 162 |  |  | 
| 163 |  |     std::shared_ptr<ObjectPool> _obj_pool; | 
| 164 |  |  | 
| 165 |  |     // Input data distribution of this pipeline. We do local exchange when input data distribution | 
| 166 |  |     // does not match the target data distribution. | 
| 167 |  |     DataDistribution _data_distribution {ExchangeType::NOOP}; | 
| 168 |  |  | 
| 169 |  |     // How many tasks should be created ? | 
| 170 |  |     int _num_tasks = 1; | 
| 171 |  |     // How many tasks are already created? | 
| 172 |  |     std::atomic<int> _num_tasks_created = 0; | 
| 173 |  |     // How many tasks are already created and not finished? | 
| 174 |  |     std::atomic<int> _num_tasks_running = 0; | 
| 175 |  |     // Tasks in this pipeline. | 
| 176 |  |     std::vector<PipelineTask*> _tasks; | 
| 177 |  |     // Parallelism of parent pipeline. | 
| 178 |  |     const int _num_tasks_of_parent; | 
| 179 |  | }; | 
| 180 |  |  | 
| 181 |  | } // namespace doris::pipeline |