/root/doris/be/src/runtime/snapshot_loader.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <gen_cpp/Types_types.h> | 
| 21 |  |  | 
| 22 |  | #include <cstdint> | 
| 23 |  | #include <map> | 
| 24 |  | #include <memory> | 
| 25 |  | #include <string> | 
| 26 |  | #include <vector> | 
| 27 |  |  | 
| 28 |  | #include "common/status.h" | 
| 29 |  | #include "olap/tablet_fwd.h" | 
| 30 |  | #include "runtime/workload_management/resource_context.h" | 
| 31 |  |  | 
| 32 |  | namespace doris { | 
| 33 |  | namespace io { | 
| 34 |  | class RemoteFileSystem; | 
| 35 |  | } // namespace io | 
| 36 |  |  | 
| 37 |  | class DataDir; | 
| 38 |  | class TRemoteTabletSnapshot; | 
| 39 |  | class StorageEngine; | 
| 40 |  |  | 
| 41 |  | struct FileStat { | 
| 42 |  |     std::string name; | 
| 43 |  |     std::string md5; | 
| 44 |  |     int64_t size; | 
| 45 |  | }; | 
| 46 |  | class ExecEnv; | 
| 47 |  |  | 
| 48 |  | class BaseSnapshotLoader { | 
| 49 |  | public: | 
| 50 |  |     BaseSnapshotLoader(ExecEnv* env, int64_t job_id, int64_t task_id, | 
| 51 |  |                        const TNetworkAddress& broker_addr = {}, | 
| 52 |  |                        const std::map<std::string, std::string>& broker_prop = {}); | 
| 53 |  |  | 
| 54 | 4 |     virtual ~BaseSnapshotLoader() = default; | 
| 55 |  |  | 
| 56 |  |     Status init(TStorageBackendType::type type, const std::string& location); | 
| 57 |  |  | 
| 58 |  |     virtual Status upload(const std::map<std::string, std::string>& src_to_dest_path, | 
| 59 |  |                           std::map<int64_t, std::vector<std::string>>* tablet_files) = 0; | 
| 60 |  |  | 
| 61 |  |     virtual Status download(const std::map<std::string, std::string>& src_to_dest_path, | 
| 62 |  |                             std::vector<int64_t>* downloaded_tablet_ids) = 0; | 
| 63 |  |  | 
| 64 | 0 |     std::shared_ptr<ResourceContext> resource_ctx() { return _resource_ctx; } | 
| 65 |  |  | 
| 66 |  | protected: | 
| 67 |  |     Status _get_tablet_id_from_remote_path(const std::string& remote_path, int64_t* tablet_id); | 
| 68 |  |  | 
| 69 |  |     Status _report_every(int report_threshold, int* counter, int finished_num, int total_num, | 
| 70 |  |                          TTaskType::type type); | 
| 71 |  |  | 
| 72 |  |     Status _list_with_checksum(const std::string& dir, std::map<std::string, FileStat>* md5_files); | 
| 73 |  |  | 
| 74 |  | protected: | 
| 75 |  |     ExecEnv* _env = nullptr; | 
| 76 |  |     int64_t _job_id; | 
| 77 |  |     int64_t _task_id; | 
| 78 |  |     const TNetworkAddress _broker_addr; | 
| 79 |  |     const std::map<std::string, std::string> _prop; | 
| 80 |  |     std::shared_ptr<io::RemoteFileSystem> _remote_fs; | 
| 81 |  |     std::shared_ptr<ResourceContext> _resource_ctx; | 
| 82 |  | }; | 
| 83 |  |  | 
| 84 |  | /* | 
| 85 |  |  * Upload: | 
| 86 |  |  * upload() will upload the specified snapshot | 
| 87 |  |  * to remote storage via broker. | 
| 88 |  |  * Each call of upload() is responsible for several tablet snapshots. | 
| 89 |  |  * | 
| 90 |  |  * It will try to get the existing files in remote storage, | 
| 91 |  |  * and only upload the incremental part of files. | 
| 92 |  |  * | 
| 93 |  |  * Download: | 
| 94 |  |  * download() will download the remote tablet snapshot files | 
| 95 |  |  * to local snapshot dir via broker. | 
| 96 |  |  * It will also only download files which does not exist in local dir. | 
| 97 |  |  * | 
| 98 |  |  * Move: | 
| 99 |  |  * move() is the final step of restore process. it will replace the | 
| 100 |  |  * old tablet data dir with the newly downloaded snapshot dir. | 
| 101 |  |  * and reload the tablet header to take this tablet on line. | 
| 102 |  |  * | 
| 103 |  |  */ | 
| 104 |  | class SnapshotLoader : public BaseSnapshotLoader { | 
| 105 |  |     friend class SnapshotHttpDownloader; | 
| 106 |  |  | 
| 107 |  | public: | 
| 108 |  |     SnapshotLoader(StorageEngine& engine, ExecEnv* env, int64_t job_id, int64_t task_id, | 
| 109 |  |                    const TNetworkAddress& broker_addr = {}, | 
| 110 |  |                    const std::map<std::string, std::string>& broker_prop = {}); | 
| 111 | 4 |     ~SnapshotLoader() override {}; | 
| 112 |  |  | 
| 113 |  |     Status upload(const std::map<std::string, std::string>& src_to_dest_path, | 
| 114 |  |                   std::map<int64_t, std::vector<std::string>>* tablet_files) override; | 
| 115 |  |  | 
| 116 |  |     Status download(const std::map<std::string, std::string>& src_to_dest_path, | 
| 117 |  |                     std::vector<int64_t>* downloaded_tablet_ids) override; | 
| 118 |  |  | 
| 119 |  |     Status remote_http_download(const std::vector<TRemoteTabletSnapshot>& remote_tablets, | 
| 120 |  |                                 std::vector<int64_t>* downloaded_tablet_ids); | 
| 121 |  |  | 
| 122 |  |     Status move(const std::string& snapshot_path, TabletSharedPtr tablet, bool overwrite); | 
| 123 |  |  | 
| 124 | 1 |     int64_t get_http_download_files_num() const { return _http_download_files_num; } | 
| 125 |  |  | 
| 126 |  | private: | 
| 127 |  |     Status _replace_tablet_id(const std::string& file_name, int64_t tablet_id, | 
| 128 |  |                               std::string* new_file_name); | 
| 129 |  |  | 
| 130 |  |     Status _check_local_snapshot_paths(const std::map<std::string, std::string>& src_to_dest_path, | 
| 131 |  |                                        bool check_src); | 
| 132 |  |  | 
| 133 |  |     Status _get_tablet_id_and_schema_hash_from_file_path(const std::string& src_path, | 
| 134 |  |                                                          int64_t* tablet_id, int32_t* schema_hash); | 
| 135 |  |  | 
| 136 |  |     Status _get_existing_files_from_local(const std::string& local_path, | 
| 137 |  |                                           std::vector<std::string>* local_files); | 
| 138 |  |  | 
| 139 | 1 |     void _set_http_download_files_num(int64_t num) { _http_download_files_num = num; } | 
| 140 |  |  | 
| 141 |  | private: | 
| 142 |  |     StorageEngine& _engine; | 
| 143 |  |     // for test remote_http_download | 
| 144 |  |     size_t _http_download_files_num; | 
| 145 |  | }; | 
| 146 |  |  | 
| 147 |  | } // end namespace doris |