/root/doris/be/src/util/timezone_utils.cpp
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #include "util/timezone_utils.h" | 
| 19 |  |  | 
| 20 |  | #include <cctz/civil_time.h> | 
| 21 |  | #include <cctz/time_zone.h> | 
| 22 |  | #include <fcntl.h> | 
| 23 |  | #include <glog/logging.h> | 
| 24 |  | #include <re2/re2.h> | 
| 25 |  | #include <re2/stringpiece.h> | 
| 26 |  | #include <sys/mman.h> | 
| 27 |  | #include <sys/stat.h> | 
| 28 |  | #include <sys/types.h> | 
| 29 |  | #include <unistd.h> | 
| 30 |  |  | 
| 31 |  | #include <boost/algorithm/string.hpp> | 
| 32 |  | #include <boost/algorithm/string/case_conv.hpp> | 
| 33 |  | #include <cstdlib> | 
| 34 |  | #include <filesystem> | 
| 35 |  | #include <memory> | 
| 36 |  | #include <string> | 
| 37 |  |  | 
| 38 |  | #include "common/exception.h" | 
| 39 |  | #include "common/logging.h" | 
| 40 |  | #include "common/status.h" | 
| 41 |  |  | 
| 42 |  | using boost::algorithm::to_lower_copy; | 
| 43 |  |  | 
| 44 |  | namespace fs = std::filesystem; | 
| 45 |  | #include "common/compile_check_begin.h" | 
| 46 |  |  | 
| 47 |  | namespace doris { | 
| 48 |  |  | 
| 49 |  | namespace vectorized { | 
| 50 |  | using ZoneList = std::unordered_map<std::string, cctz::time_zone>; | 
| 51 |  | } | 
| 52 |  |  | 
| 53 |  | RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); // visiting is thread-safe | 
| 54 |  |  | 
| 55 |  | // for ut, make it never nullptr. | 
| 56 |  | std::unique_ptr<vectorized::ZoneList> lower_zone_cache_ = std::make_unique<vectorized::ZoneList>(); | 
| 57 |  |  | 
| 58 |  | const std::string TimezoneUtils::default_time_zone = "+08:00"; | 
| 59 |  | static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change by TZDIR env var | 
| 60 |  |  | 
| 61 | 6 | void TimezoneUtils::clear_timezone_caches() { | 
| 62 | 6 |     lower_zone_cache_->clear(); | 
| 63 | 6 | } | 
| 64 | 2 | size_t TimezoneUtils::cache_size() { | 
| 65 | 2 |     return lower_zone_cache_->size(); | 
| 66 | 2 | } | 
| 67 |  |  | 
| 68 | 135k | static bool parse_save_name_tz(const std::string& tz_name) { | 
| 69 | 135k |     cctz::time_zone tz; | 
| 70 | 135k |     PROPAGATE_FALSE(cctz::load_time_zone(tz_name, &tz)); | 
| 71 | 134k |     lower_zone_cache_->emplace(to_lower_copy(tz_name), tz); | 
| 72 | 134k |     return true; | 
| 73 | 135k | } | 
| 74 |  |  | 
| 75 | 227 | void TimezoneUtils::load_timezones_to_cache() { | 
| 76 | 227 |     std::string base_str; | 
| 77 |  |     // try get from system | 
| 78 | 227 |     char* tzdir_env = std::getenv("TZDIR"); | 
| 79 | 227 |     if (tzdir_env && *tzdir_env) { | 
| 80 | 0 |         tzdir = tzdir_env; | 
| 81 | 0 |     } | 
| 82 |  |  | 
| 83 | 227 |     base_str = tzdir; | 
| 84 | 227 |     base_str += '/'; | 
| 85 |  |  | 
| 86 | 227 |     const auto root_path = fs::path {base_str}; | 
| 87 | 227 |     if (!exists(root_path)) { | 
| 88 | 0 |         throw Exception(Status::FatalError("Cannot find system tzfile. Doris exiting!")); | 
| 89 | 0 |     } | 
| 90 |  |  | 
| 91 | 227 |     std::set<std::string> ignore_paths = {"posix", "right"}; // duplications. ignore them. | 
| 92 |  |  | 
| 93 | 141k |     for (fs::recursive_directory_iterator it {base_str}; it != end(it); it++) { | 
| 94 | 140k |         const auto& dir_entry = *it; | 
| 95 | 140k |         try { | 
| 96 | 140k |             if (dir_entry.is_regular_file() || | 
| 97 | 140k |                 (dir_entry.is_symlink() && is_regular_file(read_symlink(dir_entry)))) { | 
| 98 | 135k |                 auto tz_name = dir_entry.path().string().substr(base_str.length()); | 
| 99 | 135k |                 if (!parse_save_name_tz(tz_name)) { | 
| 100 | 1.13k |                     LOG(WARNING) << "Meet illegal tzdata file: " << tz_name << ". skipped"; | 
| 101 | 1.13k |                 } | 
| 102 | 135k |             } else if (dir_entry.is_directory() && | 
| 103 | 4.99k |                        ignore_paths.contains(dir_entry.path().filename())) { | 
| 104 | 454 |                 it.disable_recursion_pending(); | 
| 105 | 454 |             } | 
| 106 | 140k |         } catch (const fs::filesystem_error& e) { | 
| 107 |  |             // maybe symlink loop or to nowhere... | 
| 108 | 0 |             LOG(WARNING) << "filesystem error when loading timezone file from " << dir_entry.path() | 
| 109 | 0 |                          << ": " << e.what(); | 
| 110 | 0 |         } | 
| 111 | 140k |     } | 
| 112 |  |     // some special cases. Z = Zulu. CST = Asia/Shanghai | 
| 113 | 227 |     if (auto it = lower_zone_cache_->find("zulu"); it != lower_zone_cache_->end()) { | 
| 114 | 227 |         lower_zone_cache_->emplace("z", it->second); | 
| 115 | 227 |     } | 
| 116 | 227 |     if (auto it = lower_zone_cache_->find("asia/shanghai"); it != lower_zone_cache_->end()) { | 
| 117 | 227 |         lower_zone_cache_->emplace("cst", it->second); | 
| 118 | 227 |     } | 
| 119 |  |  | 
| 120 | 227 |     lower_zone_cache_->erase("lmt"); // local mean time for every timezone | 
| 121 |  |  | 
| 122 | 227 |     load_offsets_to_cache(); | 
| 123 | 227 |     LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones."; | 
| 124 | 227 | } | 
| 125 |  |  | 
| 126 | 12.3k | static std::string to_hour_string(int arg) { | 
| 127 | 12.3k |     if (arg < 0 && arg > -10) { // -9 to -1 | 
| 128 | 4.10k |         return std::string {"-0"} + std::to_string(std::abs(arg)); | 
| 129 | 8.20k |     } else if (arg >= 0 && arg < 10) { //0 to 9 | 
| 130 | 4.56k |         return std::string {"0"} + std::to_string(arg); | 
| 131 | 4.56k |     } | 
| 132 | 3.64k |     return std::to_string(arg); | 
| 133 | 12.3k | } | 
| 134 |  |  | 
| 135 | 228 | void TimezoneUtils::load_offsets_to_cache() { | 
| 136 | 6.38k |     for (int hour = -12; hour <= +14; hour++) { | 
| 137 | 18.4k |         for (int minute = 0; minute <= 30; minute += 30) { | 
| 138 | 12.3k |             std::string offset_str = (hour >= 0 ? "+" : "") + to_hour_string(hour) + ':' + | 
| 139 | 12.3k |                                      (minute == 0 ? "00" : "30"); | 
| 140 | 12.3k |             cctz::time_zone result; | 
| 141 | 12.3k |             parse_tz_offset_string(offset_str, result); | 
| 142 | 12.3k |             lower_zone_cache_->emplace(offset_str, result); | 
| 143 | 12.3k |         } | 
| 144 | 6.15k |     } | 
| 145 |  |     // -00 for hour is also valid | 
| 146 | 228 |     std::string offset_str = "-00:00"; | 
| 147 | 228 |     cctz::time_zone result; | 
| 148 | 228 |     parse_tz_offset_string(offset_str, result); | 
| 149 | 228 |     lower_zone_cache_->emplace(offset_str, result); | 
| 150 | 228 |     offset_str = "-00:30"; | 
| 151 | 228 |     parse_tz_offset_string(offset_str, result); | 
| 152 | 228 |     lower_zone_cache_->emplace(offset_str, result); | 
| 153 | 228 | } | 
| 154 |  |  | 
| 155 | 182k | bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { | 
| 156 | 182k |     if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != lower_zone_cache_->end()) | 
| 157 | 101k |             [[likely]] { | 
| 158 | 101k |         ctz = it->second; | 
| 159 | 101k |         return true; | 
| 160 | 101k |     } | 
| 161 | 80.9k |     return false; | 
| 162 | 182k | } | 
| 163 |  |  | 
| 164 | 12.7k | bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz) { | 
| 165 |  |     // like +08:00, which not in timezone_names_map_ | 
| 166 | 12.7k |     re2::StringPiece value; | 
| 167 | 12.7k |     if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) | 
| 168 | 12.7k |             [[likely]] { | 
| 169 | 12.7k |         bool positive = value[0] != '-'; | 
| 170 |  |  | 
| 171 |  |         //Regular expression guarantees hour and minute must be int | 
| 172 | 12.7k |         int hour = std::stoi(value.substr(1, 2).as_string()); | 
| 173 | 12.7k |         int minute = std::stoi(value.substr(4, 2).as_string()); | 
| 174 |  |  | 
| 175 |  |         // timezone offsets around the world extended from -12:00 to +14:00 | 
| 176 | 12.7k |         if (!positive && hour > 12) { | 
| 177 | 1 |             return false; | 
| 178 | 12.7k |         } else if (positive && hour > 14) { | 
| 179 | 1 |             return false; | 
| 180 | 1 |         } | 
| 181 | 12.7k |         int offset = hour * 60 * 60 + minute * 60; | 
| 182 | 12.7k |         offset *= positive ? 1 : -1; | 
| 183 | 12.7k |         ctz = cctz::fixed_time_zone(cctz::seconds(offset)); | 
| 184 | 12.7k |         return true; | 
| 185 | 12.7k |     } | 
| 186 | 1 |     return false; | 
| 187 | 12.7k | } | 
| 188 |  |  | 
| 189 |  | #include "common/compile_check_end.h" | 
| 190 |  | } // namespace doris |