/root/doris/be/src/util/timezone_utils.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/timezone_utils.h" |
19 | | |
20 | | #include <cctz/civil_time.h> |
21 | | #include <cctz/time_zone.h> |
22 | | #include <fcntl.h> |
23 | | #include <glog/logging.h> |
24 | | #include <re2/re2.h> |
25 | | #include <re2/stringpiece.h> |
26 | | #include <sys/mman.h> |
27 | | #include <sys/stat.h> |
28 | | #include <sys/types.h> |
29 | | #include <unistd.h> |
30 | | |
31 | | #include <boost/algorithm/string.hpp> |
32 | | #include <boost/algorithm/string/case_conv.hpp> |
33 | | #include <cstdlib> |
34 | | #include <filesystem> |
35 | | #include <memory> |
36 | | #include <string> |
37 | | |
38 | | #include "common/exception.h" |
39 | | #include "common/logging.h" |
40 | | #include "common/status.h" |
41 | | |
42 | | using boost::algorithm::to_lower_copy; |
43 | | |
44 | | namespace fs = std::filesystem; |
45 | | #include "common/compile_check_begin.h" |
46 | | |
47 | | namespace doris { |
48 | | |
49 | | namespace vectorized { |
50 | | using ZoneList = std::unordered_map<std::string, cctz::time_zone>; |
51 | | } |
52 | | |
53 | | RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); // visiting is thread-safe |
54 | | |
55 | | // for ut, make it never nullptr. |
56 | | std::unique_ptr<vectorized::ZoneList> lower_zone_cache_ = std::make_unique<vectorized::ZoneList>(); |
57 | | |
58 | | const std::string TimezoneUtils::default_time_zone = "+08:00"; |
59 | | static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change by TZDIR env var |
60 | | |
61 | 6 | void TimezoneUtils::clear_timezone_caches() { |
62 | 6 | lower_zone_cache_->clear(); |
63 | 6 | } |
64 | 2 | size_t TimezoneUtils::cache_size() { |
65 | 2 | return lower_zone_cache_->size(); |
66 | 2 | } |
67 | | |
68 | 137k | static bool parse_save_name_tz(const std::string& tz_name) { |
69 | 137k | cctz::time_zone tz; |
70 | 137k | PROPAGATE_FALSE(cctz::load_time_zone(tz_name, &tz)); |
71 | 136k | lower_zone_cache_->emplace(to_lower_copy(tz_name), tz); |
72 | 136k | return true; |
73 | 137k | } |
74 | | |
75 | 230 | void TimezoneUtils::load_timezones_to_cache() { |
76 | 230 | std::string base_str; |
77 | | // try get from system |
78 | 230 | char* tzdir_env = std::getenv("TZDIR"); |
79 | 230 | if (tzdir_env && *tzdir_env) { |
80 | 0 | tzdir = tzdir_env; |
81 | 0 | } |
82 | | |
83 | 230 | base_str = tzdir; |
84 | 230 | base_str += '/'; |
85 | | |
86 | 230 | const auto root_path = fs::path {base_str}; |
87 | 230 | if (!exists(root_path)) { |
88 | 0 | throw Exception(Status::FatalError("Cannot find system tzfile. Doris exiting!")); |
89 | 0 | } |
90 | | |
91 | 230 | std::set<std::string> ignore_paths = {"posix", "right"}; // duplications. ignore them. |
92 | | |
93 | 143k | for (fs::recursive_directory_iterator it {base_str}; it != end(it); it++) { |
94 | 142k | const auto& dir_entry = *it; |
95 | 142k | try { |
96 | 142k | if (dir_entry.is_regular_file() || |
97 | 142k | (dir_entry.is_symlink() && is_regular_file(read_symlink(dir_entry)))) { |
98 | 137k | auto tz_name = dir_entry.path().string().substr(base_str.length()); |
99 | 137k | if (!parse_save_name_tz(tz_name)) { |
100 | 1.15k | LOG(WARNING) << "Meet illegal tzdata file: " << tz_name << ". skipped"; |
101 | 1.15k | } |
102 | 137k | } else if (dir_entry.is_directory() && |
103 | 5.06k | ignore_paths.contains(dir_entry.path().filename())) { |
104 | 460 | it.disable_recursion_pending(); |
105 | 460 | } |
106 | 142k | } catch (const fs::filesystem_error& e) { |
107 | | // maybe symlink loop or to nowhere... |
108 | 0 | LOG(WARNING) << "filesystem error when loading timezone file from " << dir_entry.path() |
109 | 0 | << ": " << e.what(); |
110 | 0 | } |
111 | 142k | } |
112 | | // some special cases. Z = Zulu. CST = Asia/Shanghai |
113 | 230 | if (auto it = lower_zone_cache_->find("zulu"); it != lower_zone_cache_->end()) { |
114 | 230 | lower_zone_cache_->emplace("z", it->second); |
115 | 230 | } |
116 | 230 | if (auto it = lower_zone_cache_->find("asia/shanghai"); it != lower_zone_cache_->end()) { |
117 | 230 | lower_zone_cache_->emplace("cst", it->second); |
118 | 230 | } |
119 | | |
120 | 230 | lower_zone_cache_->erase("lmt"); // local mean time for every timezone |
121 | | |
122 | 230 | load_offsets_to_cache(); |
123 | 230 | LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones."; |
124 | 230 | } |
125 | | |
126 | 12.4k | static std::string to_hour_string(int arg) { |
127 | 12.4k | if (arg < 0 && arg > -10) { // -9 to -1 |
128 | 4.15k | return std::string {"-0"} + std::to_string(std::abs(arg)); |
129 | 8.31k | } else if (arg >= 0 && arg < 10) { //0 to 9 |
130 | 4.62k | return std::string {"0"} + std::to_string(arg); |
131 | 4.62k | } |
132 | 3.69k | return std::to_string(arg); |
133 | 12.4k | } |
134 | | |
135 | 231 | void TimezoneUtils::load_offsets_to_cache() { |
136 | 6.46k | for (int hour = -12; hour <= +14; hour++) { |
137 | 18.7k | for (int minute = 0; minute <= 30; minute += 30) { |
138 | 12.4k | std::string offset_str = (hour >= 0 ? "+" : "") + to_hour_string(hour) + ':' + |
139 | 12.4k | (minute == 0 ? "00" : "30"); |
140 | 12.4k | cctz::time_zone result; |
141 | 12.4k | parse_tz_offset_string(offset_str, result); |
142 | 12.4k | lower_zone_cache_->emplace(offset_str, result); |
143 | 12.4k | } |
144 | 6.23k | } |
145 | | // -00 for hour is also valid |
146 | 231 | std::string offset_str = "-00:00"; |
147 | 231 | cctz::time_zone result; |
148 | 231 | parse_tz_offset_string(offset_str, result); |
149 | 231 | lower_zone_cache_->emplace(offset_str, result); |
150 | 231 | offset_str = "-00:30"; |
151 | 231 | parse_tz_offset_string(offset_str, result); |
152 | 231 | lower_zone_cache_->emplace(offset_str, result); |
153 | 231 | } |
154 | | |
155 | 182k | bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { |
156 | 182k | if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != lower_zone_cache_->end()) |
157 | 101k | [[likely]] { |
158 | 101k | ctz = it->second; |
159 | 101k | return true; |
160 | 101k | } |
161 | 80.9k | return false; |
162 | 182k | } |
163 | | |
164 | 12.9k | bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz) { |
165 | | // like +08:00, which not in timezone_names_map_ |
166 | 12.9k | re2::StringPiece value; |
167 | 12.9k | if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) |
168 | 12.9k | [[likely]] { |
169 | 12.9k | bool positive = value[0] != '-'; |
170 | | |
171 | | //Regular expression guarantees hour and minute must be int |
172 | 12.9k | int hour = std::stoi(value.substr(1, 2).as_string()); |
173 | 12.9k | int minute = std::stoi(value.substr(4, 2).as_string()); |
174 | | |
175 | | // timezone offsets around the world extended from -12:00 to +14:00 |
176 | 12.9k | if (!positive && hour > 12) { |
177 | 1 | return false; |
178 | 12.9k | } else if (positive && hour > 14) { |
179 | 1 | return false; |
180 | 1 | } |
181 | 12.9k | int offset = hour * 60 * 60 + minute * 60; |
182 | 12.9k | offset *= positive ? 1 : -1; |
183 | 12.9k | ctz = cctz::fixed_time_zone(cctz::seconds(offset)); |
184 | 12.9k | return true; |
185 | 12.9k | } |
186 | 1 | return false; |
187 | 12.9k | } |
188 | | |
189 | | #include "common/compile_check_end.h" |
190 | | } // namespace doris |