/root/doris/be/src/util/timezone_utils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/timezone_utils.h" |
19 | | |
20 | | #include <cctz/civil_time.h> |
21 | | #include <cctz/time_zone.h> |
22 | | #include <fcntl.h> |
23 | | #include <glog/logging.h> |
24 | | #include <re2/re2.h> |
25 | | #include <re2/stringpiece.h> |
26 | | #include <sys/mman.h> |
27 | | #include <sys/stat.h> |
28 | | #include <sys/types.h> |
29 | | #include <unistd.h> |
30 | | |
31 | | #include <boost/algorithm/string.hpp> |
32 | | #include <boost/algorithm/string/case_conv.hpp> |
33 | | #include <cstdlib> |
34 | | #include <filesystem> |
35 | | #include <memory> |
36 | | #include <string> |
37 | | |
38 | | #include "common/logging.h" |
39 | | #include "common/status.h" |
40 | | |
41 | | using boost::algorithm::to_lower_copy; |
42 | | |
43 | | namespace fs = std::filesystem; |
44 | | |
45 | | namespace doris { |
46 | | |
47 | | namespace vectorized { |
48 | | using ZoneList = std::unordered_map<std::string, cctz::time_zone>; |
49 | | } |
50 | | |
51 | | RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); // visiting is thread-safe |
52 | | |
53 | | // for ut, make it never nullptr. |
54 | | std::unique_ptr<vectorized::ZoneList> lower_zone_cache_ = std::make_unique<vectorized::ZoneList>(); |
55 | | |
56 | | const std::string TimezoneUtils::default_time_zone = "+08:00"; |
57 | | static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change by TZDIR env var |
58 | | |
59 | 2 | void TimezoneUtils::clear_timezone_caches() { |
60 | 2 | lower_zone_cache_->clear(); |
61 | 2 | } |
62 | 2 | int TimezoneUtils::cache_size() { |
63 | 2 | return lower_zone_cache_->size(); |
64 | 2 | } |
65 | | |
66 | 38.3k | static bool parse_save_name_tz(const std::string& tz_name) { |
67 | 38.3k | cctz::time_zone tz; |
68 | 38.3k | PROPAGATE_FALSE(cctz::load_time_zone(tz_name, &tz)); |
69 | 38.0k | lower_zone_cache_->emplace(to_lower_copy(tz_name), tz); |
70 | 38.0k | return true; |
71 | 38.3k | } |
72 | | |
73 | 64 | void TimezoneUtils::load_timezones_to_cache() { |
74 | 64 | std::string base_str; |
75 | | // try get from system |
76 | 64 | char* tzdir_env = std::getenv("TZDIR"); |
77 | 64 | if (tzdir_env && *tzdir_env) { |
78 | 0 | tzdir = tzdir_env; |
79 | 0 | } |
80 | | |
81 | 64 | base_str = tzdir; |
82 | 64 | base_str += '/'; |
83 | | |
84 | 64 | const auto root_path = fs::path {base_str}; |
85 | 64 | if (!exists(root_path)) { |
86 | 0 | LOG(FATAL) << "Cannot find system tzfile. Doris exiting!"; |
87 | 0 | __builtin_unreachable(); |
88 | 0 | } |
89 | | |
90 | 64 | std::set<std::string> ignore_paths = {"posix", "right"}; // duplications. ignore them. |
91 | | |
92 | 39.8k | for (fs::recursive_directory_iterator it {base_str}; it != end(it); it++) { |
93 | 39.7k | const auto& dir_entry = *it; |
94 | 39.7k | if (dir_entry.is_regular_file() || |
95 | 39.7k | (dir_entry.is_symlink() && is_regular_file(read_symlink(dir_entry)))) { |
96 | 38.3k | auto tz_name = dir_entry.path().string().substr(base_str.length()); |
97 | 38.3k | if (!parse_save_name_tz(tz_name)) { |
98 | 320 | LOG(WARNING) << "Meet illegal tzdata file: " << tz_name << ". skipped"; |
99 | 320 | } |
100 | 38.3k | } else if (dir_entry.is_directory() && ignore_paths.contains(dir_entry.path().filename())) { |
101 | 128 | it.disable_recursion_pending(); |
102 | 128 | } |
103 | 39.7k | } |
104 | | // some special cases. Z = Zulu. CST = Asia/Shanghai |
105 | 64 | if (auto it = lower_zone_cache_->find("zulu"); it != lower_zone_cache_->end()) { |
106 | 64 | lower_zone_cache_->emplace("z", it->second); |
107 | 64 | } |
108 | 64 | if (auto it = lower_zone_cache_->find("asia/shanghai"); it != lower_zone_cache_->end()) { |
109 | 64 | lower_zone_cache_->emplace("cst", it->second); |
110 | 64 | } |
111 | | |
112 | 64 | lower_zone_cache_->erase("lmt"); // local mean time for every timezone |
113 | | |
114 | 64 | load_offsets_to_cache(); |
115 | 64 | LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones."; |
116 | 64 | } |
117 | | |
118 | 3.51k | static std::string to_hour_string(int arg) { |
119 | 3.51k | if (arg < 0 && arg > -10) { // -9 to -1 |
120 | 1.17k | return std::string {"-0"} + std::to_string(std::abs(arg)); |
121 | 2.34k | } else if (arg >= 0 && arg < 10) { //0 to 9 |
122 | 1.30k | return std::string {"0"} + std::to_string(arg); |
123 | 1.30k | } |
124 | 1.04k | return std::to_string(arg); |
125 | 3.51k | } |
126 | | |
127 | 65 | void TimezoneUtils::load_offsets_to_cache() { |
128 | 1.82k | for (int hour = -12; hour <= +14; hour++) { |
129 | 5.26k | for (int minute = 0; minute <= 30; minute += 30) { |
130 | 3.51k | std::string offset_str = (hour >= 0 ? "+" : "") + to_hour_string(hour) + ':' + |
131 | 3.51k | (minute == 0 ? "00" : "30"); |
132 | 3.51k | cctz::time_zone result; |
133 | 3.51k | parse_tz_offset_string(offset_str, result); |
134 | 3.51k | lower_zone_cache_->emplace(offset_str, result); |
135 | 3.51k | } |
136 | 1.75k | } |
137 | | // -00 for hour is also valid |
138 | 65 | std::string offset_str = "-00:00"; |
139 | 65 | cctz::time_zone result; |
140 | 65 | parse_tz_offset_string(offset_str, result); |
141 | 65 | lower_zone_cache_->emplace(offset_str, result); |
142 | 65 | offset_str = "-00:30"; |
143 | 65 | parse_tz_offset_string(offset_str, result); |
144 | 65 | lower_zone_cache_->emplace(offset_str, result); |
145 | 65 | } |
146 | | |
147 | 11.9k | bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { |
148 | 11.9k | if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != lower_zone_cache_->end()) |
149 | 11.9k | [[likely]] { |
150 | 11.9k | ctz = it->second; |
151 | 11.9k | return true; |
152 | 11.9k | } |
153 | 3 | return false; |
154 | 11.9k | } |
155 | | |
156 | 3.65k | bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz) { |
157 | | // like +08:00, which not in timezone_names_map_ |
158 | 3.65k | re2::StringPiece value; |
159 | 3.65k | if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) |
160 | 3.65k | [[likely]] { |
161 | 3.65k | bool positive = value[0] != '-'; |
162 | | |
163 | | //Regular expression guarantees hour and minute must be int |
164 | 3.65k | int hour = std::stoi(value.substr(1, 2).as_string()); |
165 | 3.65k | int minute = std::stoi(value.substr(4, 2).as_string()); |
166 | | |
167 | | // timezone offsets around the world extended from -12:00 to +14:00 |
168 | 3.65k | if (!positive && hour > 12) { |
169 | 1 | return false; |
170 | 3.65k | } else if (positive && hour > 14) { |
171 | 1 | return false; |
172 | 1 | } |
173 | 3.65k | int offset = hour * 60 * 60 + minute * 60; |
174 | 3.65k | offset *= positive ? 1 : -1; |
175 | 3.65k | ctz = cctz::fixed_time_zone(cctz::seconds(offset)); |
176 | 3.65k | return true; |
177 | 3.65k | } |
178 | 1 | return false; |
179 | 3.65k | } |
180 | | |
181 | | } // namespace doris |