Coverage Report

Created: 2026-04-27 08:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/timezone_utils.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/timezone_utils.h"
19
20
#include <cctz/civil_time.h>
21
#include <cctz/time_zone.h>
22
#include <fcntl.h>
23
#include <glog/logging.h>
24
#include <re2/re2.h>
25
#include <re2/stringpiece.h>
26
#include <sys/mman.h>
27
#include <sys/stat.h>
28
#include <sys/types.h>
29
#include <unistd.h>
30
31
#include <algorithm>
32
#include <boost/algorithm/string.hpp>
33
#include <boost/algorithm/string/case_conv.hpp>
34
#include <cctype>
35
#include <chrono>
36
#include <cstdlib>
37
#include <filesystem>
38
#include <memory>
39
#include <string>
40
#include <string_view>
41
42
#include "common/exception.h"
43
#include "common/logging.h"
44
#include "common/status.h"
45
46
using boost::algorithm::to_lower_copy;
47
48
namespace fs = std::filesystem;
49
50
namespace doris {
51
52
using ZoneList = std::unordered_map<std::string, cctz::time_zone>;
53
54
RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); // visiting is thread-safe
55
56
// for ut, make it never nullptr.
57
std::unique_ptr<ZoneList> lower_zone_cache_ = std::make_unique<ZoneList>();
58
59
const std::string TimezoneUtils::default_time_zone = "+08:00";
60
static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change by TZDIR env var
61
62
6
void TimezoneUtils::clear_timezone_caches() {
63
6
    lower_zone_cache_->clear();
64
6
}
65
2
size_t TimezoneUtils::cache_size() {
66
2
    return lower_zone_cache_->size();
67
2
}
68
69
148k
static bool parse_save_name_tz(const std::string& tz_name) {
70
148k
    cctz::time_zone tz;
71
148k
    PROPAGATE_FALSE(cctz::load_time_zone(tz_name, &tz));
72
146k
    lower_zone_cache_->emplace(to_lower_copy(tz_name), tz);
73
146k
    return true;
74
148k
}
75
76
247
void TimezoneUtils::load_timezones_to_cache() {
77
247
    std::string base_str;
78
    // try get from system
79
247
    char* tzdir_env = std::getenv("TZDIR");
80
247
    if (tzdir_env && *tzdir_env) {
81
0
        tzdir = tzdir_env;
82
0
    }
83
84
247
    base_str = tzdir;
85
247
    base_str += '/';
86
87
247
    const auto root_path = fs::path {base_str};
88
247
    if (!exists(root_path)) {
89
0
        throw Exception(Status::FatalError("Cannot find system tzfile. Doris exiting!"));
90
0
    }
91
92
247
    std::set<std::string> ignore_paths = {"posix", "right"}; // duplications. ignore them.
93
94
153k
    for (fs::recursive_directory_iterator it {base_str}; it != end(it); it++) {
95
153k
        const auto& dir_entry = *it;
96
153k
        try {
97
153k
            if (dir_entry.is_regular_file() ||
98
153k
                (dir_entry.is_symlink() && is_regular_file(read_symlink(dir_entry)))) {
99
148k
                auto tz_name = dir_entry.path().string().substr(base_str.length());
100
148k
                if (!parse_save_name_tz(tz_name)) {
101
1.24k
                    LOG(WARNING) << "Meet illegal tzdata file: " << tz_name << ". skipped";
102
1.24k
                }
103
148k
            } else if (dir_entry.is_directory() &&
104
5.44k
                       ignore_paths.contains(dir_entry.path().filename())) {
105
494
                it.disable_recursion_pending();
106
494
            }
107
153k
        } catch (const fs::filesystem_error& e) {
108
            // maybe symlink loop or to nowhere...
109
0
            LOG(WARNING) << "filesystem error when loading timezone file from " << dir_entry.path()
110
0
                         << ": " << e.what();
111
0
        }
112
153k
    }
113
    // some special cases. Z = Zulu. CST = Asia/Shanghai
114
247
    if (auto it = lower_zone_cache_->find("zulu"); it != lower_zone_cache_->end()) {
115
247
        lower_zone_cache_->emplace("z", it->second);
116
247
    }
117
247
    if (auto it = lower_zone_cache_->find("asia/shanghai"); it != lower_zone_cache_->end()) {
118
247
        lower_zone_cache_->emplace("cst", it->second);
119
247
    }
120
121
247
    lower_zone_cache_->erase("lmt"); // local mean time for every timezone
122
123
247
    load_offsets_to_cache();
124
247
    LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones.";
125
247
}
126
127
21.3k
static std::string to_hour_string(int arg) {
128
21.3k
    if (arg < 0 && arg > -10) { // -9 to -1
129
7.12k
        return std::string {"-0"} + std::to_string(std::abs(arg));
130
14.2k
    } else if (arg >= 0 && arg < 10) { //0 to 9
131
7.92k
        return std::string {"0"} + std::to_string(arg);
132
7.92k
    }
133
6.33k
    return std::to_string(arg);
134
21.3k
}
135
136
264
void TimezoneUtils::load_offsets_to_cache() {
137
264
    static constexpr int supported_minutes[] = {0, 30, 45};
138
7.39k
    for (int hour = -12; hour <= +14; hour++) {
139
21.3k
        for (int minute : supported_minutes) {
140
21.3k
            char min_str[3];
141
21.3k
            snprintf(min_str, sizeof(min_str), "%02d", minute);
142
21.3k
            std::string offset_str = (hour >= 0 ? "+" : "") + to_hour_string(hour) + ':' + min_str;
143
21.3k
            cctz::time_zone result;
144
21.3k
            parse_tz_offset_string(offset_str, result);
145
21.3k
            lower_zone_cache_->emplace(offset_str, result);
146
21.3k
        }
147
7.12k
    }
148
    // -00 for hour is also valid
149
264
    std::string offset_str = "-00:00";
150
264
    cctz::time_zone result;
151
264
    parse_tz_offset_string(offset_str, result);
152
264
    lower_zone_cache_->emplace(offset_str, result);
153
264
    offset_str = "-00:30";
154
264
    parse_tz_offset_string(offset_str, result);
155
264
    lower_zone_cache_->emplace(offset_str, result);
156
264
    offset_str = "-00:45";
157
264
    parse_tz_offset_string(offset_str, result);
158
264
    lower_zone_cache_->emplace(offset_str, result);
159
264
}
160
161
2.77M
bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) {
162
2.77M
    if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != lower_zone_cache_->end())
163
2.72M
            [[likely]] {
164
2.72M
        ctz = it->second;
165
2.72M
        return true;
166
2.72M
    }
167
168
47.3k
    std::string normalized;
169
47.3k
    if (!normalize_timezone_name(timezone, &normalized)) {
170
315
        return false;
171
315
    }
172
47.0k
    if (auto it = lower_zone_cache_->find(to_lower_copy(normalized));
173
47.0k
        it != lower_zone_cache_->end()) [[likely]] {
174
4
        ctz = it->second;
175
4
        return true;
176
4
    }
177
46.9k
    return parse_tz_offset_string(normalized, ctz);
178
47.0k
}
179
180
bool TimezoneUtils::try_get_fixed_offset_seconds(const cctz::time_zone& timezone,
181
192k
                                                 int32_t* offset_seconds) {
182
192k
    const std::string& timezone_name = timezone.name();
183
192k
    if (timezone_name == "UTC" || timezone_name == "Etc/UTC" || timezone_name == "Etc/GMT") {
184
2.54k
        *offset_seconds = 0;
185
2.54k
        return true;
186
2.54k
    }
187
188
    // cctz names fixed_time_zone() instances with the "Fixed/" prefix. TZDB's Etc/GMT*
189
    // zones are fixed offsets too; cctz handles their POSIX-style reversed sign in lookup_offset().
190
    // If this naming convention changes, falling through to the generic path remains correct.
191
189k
    static const auto epoch = std::chrono::time_point_cast<cctz::sys_seconds>(
192
189k
            std::chrono::system_clock::from_time_t(0));
193
189k
    if (timezone_name.compare(0, 6, "Fixed/") == 0 || timezone_name.compare(0, 7, "Etc/GMT") == 0) {
194
236
        *offset_seconds = timezone.lookup_offset(epoch).offset;
195
236
        return true;
196
236
    }
197
189k
    return false;
198
189k
}
199
200
static bool normalize_offset_string(const std::string& timezone, bool allow_hour_only,
201
119k
                                    std::string* normalized) {
202
119k
    if (timezone.size() < 2 || (timezone[0] != '+' && timezone[0] != '-')) {
203
1
        return false;
204
1
    }
205
206
119k
    const bool positive = timezone[0] == '+';
207
119k
    const std::string_view rest(timezone.data() + 1, timezone.size() - 1);
208
119k
    int hour = 0;
209
119k
    int minute = 0;
210
211
238k
    const auto parse_digit = [](char c) -> int { return c - '0'; };
212
119k
    const auto is_two_digits = [](std::string_view value) -> bool {
213
119k
        return value.size() == 2 && std::isdigit(static_cast<unsigned char>(value[0])) &&
214
119k
               std::isdigit(static_cast<unsigned char>(value[1]));
215
119k
    };
216
119k
    const auto is_one_or_two_digits = [](std::string_view value) -> bool {
217
119k
        return (value.size() == 1 || value.size() == 2) &&
218
119k
               std::all_of(value.begin(), value.end(),
219
238k
                           [](char c) { return std::isdigit(static_cast<unsigned char>(c)); });
220
119k
    };
221
222
119k
    auto colon_pos = rest.find(':');
223
119k
    if (colon_pos != std::string_view::npos) {
224
119k
        std::string_view hour_part = rest.substr(0, colon_pos);
225
119k
        std::string_view minute_part = rest.substr(colon_pos + 1);
226
119k
        if (!is_one_or_two_digits(hour_part) || !is_two_digits(minute_part)) {
227
0
            return false;
228
0
        }
229
119k
        hour = std::stoi(std::string(hour_part));
230
119k
        minute = parse_digit(minute_part[0]) * 10 + parse_digit(minute_part[1]);
231
119k
    } else {
232
5
        if (!allow_hour_only || !is_one_or_two_digits(rest)) {
233
2
            return false;
234
2
        }
235
3
        hour = std::stoi(std::string(rest));
236
3
        minute = 0;
237
3
    }
238
239
119k
    if ((!positive && hour > 12) || (positive && hour > 14) || minute >= 60) {
240
7
        return false;
241
7
    }
242
243
119k
    *normalized = std::string(1, positive ? '+' : '-') + (hour < 10 ? "0" : "") +
244
119k
                  std::to_string(hour) + ":" + (minute < 10 ? "0" : "") + std::to_string(minute);
245
119k
    return true;
246
119k
}
247
248
120k
bool TimezoneUtils::normalize_timezone_name(const std::string& timezone, std::string* normalized) {
249
120k
    const std::string lower = to_lower_copy(timezone);
250
120k
    if (lower == "utc" || lower == "etc/utc" || lower == "zulu") {
251
285
        *normalized = "UTC";
252
285
        return true;
253
285
    }
254
255
119k
    if (lower.rfind("utc", 0) == 0 || lower.rfind("gmt", 0) == 0) {
256
6
        if (timezone.size() <= 3) {
257
0
            return false;
258
0
        }
259
6
        return normalize_offset_string(timezone.substr(3), true, normalized);
260
6
    }
261
262
119k
    if (!timezone.empty() && (timezone[0] == '+' || timezone[0] == '-')) {
263
119k
        return normalize_offset_string(timezone, false, normalized);
264
119k
    }
265
266
310
    return false;
267
119k
}
268
269
70.9k
bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz) {
270
70.9k
    std::string normalized;
271
70.9k
    if (!normalize_timezone_name(timezone, &normalized)) {
272
5
        return false;
273
5
    }
274
70.9k
    if (normalized == "UTC") {
275
143
        ctz = cctz::utc_time_zone();
276
143
        return true;
277
143
    }
278
279
70.8k
    re2::StringPiece value;
280
70.8k
    if (time_zone_offset_format_reg.Match(normalized, 0, normalized.size(), RE2::UNANCHORED, &value,
281
70.8k
                                          1)) [[likely]] {
282
70.8k
        const bool positive = value[0] != '-';
283
70.8k
        const int hour = std::stoi(value.substr(1, 2).as_string());
284
70.8k
        const int minute = std::stoi(value.substr(4, 2).as_string());
285
70.8k
        int offset = hour * 60 * 60 + minute * 60;
286
70.8k
        offset *= positive ? 1 : -1;
287
70.8k
        ctz = cctz::fixed_time_zone(cctz::seconds(offset));
288
70.8k
        return true;
289
70.8k
    }
290
0
    return false;
291
70.8k
}
292
293
} // namespace doris