be/src/exprs/function/function_convert_tz.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cctz/time_zone.h> |
19 | | |
20 | | #include <cstddef> |
21 | | #include <cstdint> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/binary_cast.hpp" |
29 | | #include "core/block/block.h" |
30 | | #include "core/block/column_numbers.h" |
31 | | #include "core/block/column_with_type_and_name.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/column/column_string.h" |
36 | | #include "core/column/column_vector.h" |
37 | | #include "core/data_type/data_type.h" |
38 | | #include "core/data_type/data_type_date.h" |
39 | | #include "core/data_type/data_type_date_or_datetime_v2.h" |
40 | | #include "core/data_type/data_type_date_time.h" |
41 | | #include "core/data_type/data_type_nullable.h" |
42 | | #include "core/data_type/data_type_string.h" |
43 | | #include "core/data_type/define_primitive_type.h" |
44 | | #include "core/data_type/primitive_type.h" |
45 | | #include "core/string_ref.h" |
46 | | #include "core/types.h" |
47 | | #include "core/value/vdatetime_value.h" |
48 | | #include "exec/common/util.hpp" |
49 | | #include "exprs/aggregate/aggregate_function.h" |
50 | | #include "exprs/function/datetime_errors.h" |
51 | | #include "exprs/function/function.h" |
52 | | #include "exprs/function/function_helpers.h" |
53 | | #include "exprs/function/simple_function_factory.h" |
54 | | #include "exprs/function_context.h" |
55 | | #include "util/timezone_utils.h" |
56 | | |
57 | | namespace doris { |
58 | | |
59 | | struct ConvertTzState { |
60 | | bool use_state = false; |
61 | | bool is_valid = false; |
62 | | cctz::time_zone from_tz; |
63 | | cctz::time_zone to_tz; |
64 | | }; |
65 | | |
66 | | class FunctionConvertTZ : public IFunction { |
67 | | constexpr static PrimitiveType PType = PrimitiveType::TYPE_DATETIMEV2; |
68 | | using DateValueType = PrimitiveTypeTraits<PType>::CppType; |
69 | | using ColumnType = PrimitiveTypeTraits<PType>::ColumnType; |
70 | | |
71 | | public: |
72 | | static constexpr auto name = "convert_tz"; |
73 | | |
74 | 80 | static FunctionPtr create() { return std::make_shared<FunctionConvertTZ>(); } |
75 | | |
76 | 1 | String get_name() const override { return name; } |
77 | | |
78 | 71 | size_t get_number_of_arguments() const override { return 3; } |
79 | | |
80 | 71 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
81 | 71 | return have_nullable(arguments) ? make_nullable(std::make_shared<DataTypeDateTimeV2>()) |
82 | 71 | : std::make_shared<DataTypeDateTimeV2>(); |
83 | 71 | } |
84 | | |
85 | | // default value of timezone is invalid, should skip to avoid wrong exception |
86 | 159 | bool use_default_implementation_for_nulls() const override { return false; } |
87 | | |
88 | 391 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
89 | 391 | if (scope == FunctionContext::THREAD_LOCAL) { |
90 | 320 | return Status::OK(); |
91 | 320 | } |
92 | 71 | std::shared_ptr<ConvertTzState> state = std::make_shared<ConvertTzState>(); |
93 | | |
94 | 71 | context->set_function_state(scope, state); |
95 | 71 | DCHECK_EQ(context->get_num_args(), 3); |
96 | 71 | const auto* const_from_tz = context->get_constant_col(1); |
97 | 71 | const auto* const_to_tz = context->get_constant_col(2); |
98 | | |
99 | | // ConvertTzState is used only when both the second and third parameters are constants |
100 | 71 | if (const_from_tz != nullptr && const_to_tz != nullptr) { |
101 | 47 | state->use_state = true; |
102 | 47 | init_convert_tz_state(state, const_from_tz, const_to_tz); |
103 | 47 | } else { |
104 | 24 | state->use_state = false; |
105 | 24 | } |
106 | | |
107 | 71 | return IFunction::open(context, scope); |
108 | 391 | } |
109 | | |
110 | | void init_convert_tz_state(std::shared_ptr<ConvertTzState> state, |
111 | | const ColumnPtrWrapper* const_from_tz, |
112 | 47 | const ColumnPtrWrapper* const_to_tz) { |
113 | 47 | auto const_data_from_tz = const_from_tz->column_ptr->get_data_at(0); |
114 | 47 | auto const_data_to_tz = const_to_tz->column_ptr->get_data_at(0); |
115 | | |
116 | | // from_tz and to_tz must both be non-null. |
117 | 47 | if (const_data_from_tz.data == nullptr || const_data_to_tz.data == nullptr) { |
118 | 2 | state->is_valid = false; |
119 | 2 | return; |
120 | 2 | } |
121 | | |
122 | 45 | auto from_tz_name = const_data_from_tz.to_string(); |
123 | 45 | auto to_tz_name = const_data_to_tz.to_string(); |
124 | | |
125 | 45 | if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, state->from_tz)) [[unlikely]] { |
126 | 2 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
127 | 2 | from_tz_name); |
128 | 2 | } |
129 | 43 | if (!TimezoneUtils::find_cctz_time_zone(to_tz_name, state->to_tz)) [[unlikely]] { |
130 | 2 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
131 | 2 | to_tz_name); |
132 | 2 | } |
133 | 41 | state->is_valid = true; |
134 | 41 | } |
135 | | |
136 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
137 | 88 | uint32_t result, size_t input_rows_count) const override { |
138 | 88 | auto* convert_tz_state = reinterpret_cast<ConvertTzState*>( |
139 | 88 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
140 | 88 | if (!convert_tz_state) { |
141 | 0 | return Status::RuntimeError( |
142 | 0 | "funciton context for function '{}' must have ConvertTzState;", get_name()); |
143 | 0 | } |
144 | | |
145 | 88 | auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
146 | 88 | NullMap& result_null_map = assert_cast<ColumnUInt8&>(*result_null_map_column).get_data(); |
147 | | |
148 | 88 | ColumnPtr argument_columns[3]; |
149 | 88 | bool col_const[3]; |
150 | | |
151 | | // calculate result null map and col_const |
152 | 352 | for (int i = 0; i < 3; ++i) { |
153 | 264 | ColumnPtr& col = block.get_by_position(arguments[i]).column; |
154 | 264 | col_const[i] = is_column_const(*col); |
155 | 264 | const NullMap* null_map = VectorizedUtils::get_null_map(col); |
156 | 264 | if (null_map) { |
157 | 122 | VectorizedUtils::update_null_map(result_null_map, *null_map, col_const[i]); |
158 | 122 | } |
159 | 264 | } |
160 | | |
161 | | // Extract nested columns from const(nullable) wrappers |
162 | 88 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
163 | 1 | *block.get_by_position(arguments[0]).column) |
164 | 1 | .convert_to_full_column() |
165 | 88 | : block.get_by_position(arguments[0]).column; |
166 | 88 | argument_columns[0] = remove_nullable(argument_columns[0]); |
167 | 88 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
168 | 88 | argument_columns[1] = remove_nullable(argument_columns[1]); |
169 | 88 | argument_columns[2] = remove_nullable(argument_columns[2]); |
170 | | |
171 | 88 | auto result_column = ColumnType::create(); |
172 | 88 | if (convert_tz_state->use_state) { |
173 | | // ignore argument columns, use cached timezone input in state |
174 | 55 | execute_tz_const_with_state(convert_tz_state, |
175 | 55 | assert_cast<const ColumnType*>(argument_columns[0].get()), |
176 | 55 | assert_cast<ColumnType*>(result_column.get()), |
177 | 55 | result_null_map, input_rows_count); |
178 | 55 | } else if (col_const[1] && col_const[2]) { |
179 | | // arguments are const |
180 | 0 | execute_tz_const(context, assert_cast<const ColumnType*>(argument_columns[0].get()), |
181 | 0 | assert_cast<const ColumnString*>(argument_columns[1].get()), |
182 | 0 | assert_cast<const ColumnString*>(argument_columns[2].get()), |
183 | 0 | assert_cast<ColumnType*>(result_column.get()), result_null_map, |
184 | 0 | input_rows_count); |
185 | 33 | } else { |
186 | 33 | _execute(context, assert_cast<const ColumnType*>(argument_columns[0].get()), |
187 | 33 | assert_cast<const ColumnString*>(argument_columns[1].get()), |
188 | 33 | assert_cast<const ColumnString*>(argument_columns[2].get()), |
189 | 33 | assert_cast<ColumnType*>(result_column.get()), result_null_map, |
190 | 33 | input_rows_count); |
191 | 33 | } //if const |
192 | | |
193 | 88 | if (block.get_data_type(result)->is_nullable()) { |
194 | 77 | block.get_by_position(result).column = ColumnNullable::create( |
195 | 77 | std::move(result_column), std::move(result_null_map_column)); |
196 | 77 | } else { |
197 | 11 | block.get_by_position(result).column = std::move(result_column); |
198 | 11 | } |
199 | 88 | return Status::OK(); |
200 | 88 | } |
201 | | |
202 | | private: |
203 | | static void _execute(FunctionContext* context, const ColumnType* date_column, |
204 | | const ColumnString* from_tz_column, const ColumnString* to_tz_column, |
205 | | ColumnType* result_column, NullMap& result_null_map, |
206 | 33 | size_t input_rows_count) { |
207 | 132 | for (size_t i = 0; i < input_rows_count; i++) { |
208 | 99 | if (result_null_map[i]) { |
209 | 0 | result_column->insert_default(); |
210 | 0 | continue; |
211 | 0 | } |
212 | 99 | auto from_tz = from_tz_column->get_data_at(i).to_string(); |
213 | 99 | auto to_tz = to_tz_column->get_data_at(i).to_string(); |
214 | 99 | execute_inner_loop(date_column, from_tz, to_tz, result_column, result_null_map, i); |
215 | 99 | } |
216 | 33 | } |
217 | | |
218 | | static void execute_tz_const_with_state(ConvertTzState* convert_tz_state, |
219 | | const ColumnType* date_column, |
220 | | ColumnType* result_column, NullMap& result_null_map, |
221 | 55 | size_t input_rows_count) { |
222 | 55 | cctz::time_zone& from_tz = convert_tz_state->from_tz; |
223 | 55 | cctz::time_zone& to_tz = convert_tz_state->to_tz; |
224 | 55 | auto push_null = [&](size_t row) { |
225 | 2 | result_null_map[row] = true; |
226 | 2 | result_column->insert_default(); |
227 | 2 | }; |
228 | | // state isn't valid means there's NULL in timezone input. so return null rather than exception |
229 | 55 | if (!convert_tz_state->is_valid) [[unlikely]] { |
230 | | // If an invalid timezone is present, return null |
231 | 4 | for (size_t i = 0; i < input_rows_count; i++) { |
232 | 2 | push_null(i); |
233 | 2 | } |
234 | 2 | return; |
235 | 2 | } |
236 | 159 | for (size_t i = 0; i < input_rows_count; i++) { |
237 | 106 | if (result_null_map[i]) { |
238 | 3 | result_column->insert_default(); |
239 | 3 | continue; |
240 | 3 | } |
241 | | |
242 | 103 | DateValueType ts_value = date_column->get_element(i); |
243 | 103 | DateValueType ts_value2; |
244 | | |
245 | 103 | std::pair<int64_t, int64_t> timestamp; |
246 | 103 | ts_value.unix_timestamp(×tamp, from_tz); |
247 | 103 | ts_value2.from_unixtime(timestamp, to_tz); |
248 | | |
249 | 103 | if (!ts_value2.is_valid_date()) [[unlikely]] { |
250 | 1 | throw_out_of_bound_convert_tz<DateValueType>(date_column->get_element(i), |
251 | 1 | from_tz.name(), to_tz.name()); |
252 | 1 | } |
253 | | |
254 | 103 | result_column->insert(Field::create_field<TYPE_DATETIMEV2>(ts_value2)); |
255 | 103 | } |
256 | 53 | } |
257 | | |
258 | | static void execute_tz_const(FunctionContext* context, const ColumnType* date_column, |
259 | | const ColumnString* from_tz_column, |
260 | | const ColumnString* to_tz_column, ColumnType* result_column, |
261 | 0 | NullMap& result_null_map, size_t input_rows_count) { |
262 | 0 | auto from_tz = from_tz_column->get_data_at(0).to_string(); |
263 | 0 | auto to_tz = to_tz_column->get_data_at(0).to_string(); |
264 | 0 | cctz::time_zone from_zone, to_zone; |
265 | 0 | if (!TimezoneUtils::find_cctz_time_zone(from_tz, from_zone)) [[unlikely]] { |
266 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
267 | 0 | from_tz); |
268 | 0 | } |
269 | 0 | if (!TimezoneUtils::find_cctz_time_zone(to_tz, to_zone)) [[unlikely]] { |
270 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
271 | 0 | to_tz); |
272 | 0 | } |
273 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
274 | 0 | if (result_null_map[i]) { |
275 | 0 | result_column->insert_default(); |
276 | 0 | continue; |
277 | 0 | } |
278 | 0 | execute_inner_loop(date_column, from_tz, to_tz, result_column, result_null_map, i); |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | | static void execute_inner_loop(const ColumnType* date_column, const std::string& from_tz_name, |
283 | | const std::string& to_tz_name, ColumnType* result_column, |
284 | 99 | NullMap& result_null_map, const size_t index_now) { |
285 | 99 | DateValueType ts_value = date_column->get_element(index_now); |
286 | 99 | cctz::time_zone from_tz {}, to_tz {}; |
287 | 99 | DateValueType ts_value2; |
288 | | |
289 | 99 | if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, from_tz)) [[unlikely]] { |
290 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
291 | 0 | from_tz_name); |
292 | 0 | } |
293 | 99 | if (!TimezoneUtils::find_cctz_time_zone(to_tz_name, to_tz)) [[unlikely]] { |
294 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
295 | 0 | to_tz_name); |
296 | 0 | } |
297 | | |
298 | 99 | std::pair<int64_t, int64_t> timestamp; |
299 | 99 | ts_value.unix_timestamp(×tamp, from_tz); |
300 | 99 | ts_value2.from_unixtime(timestamp, to_tz); |
301 | | |
302 | 99 | if (!ts_value2.is_valid_date()) [[unlikely]] { |
303 | 0 | throw_out_of_bound_convert_tz<DateValueType>(date_column->get_element(index_now), |
304 | 0 | from_tz.name(), to_tz.name()); |
305 | 0 | } |
306 | | |
307 | 99 | result_column->insert(Field::create_field<TYPE_DATETIMEV2>(ts_value2)); |
308 | 99 | } |
309 | | }; |
310 | | |
311 | 8 | void register_function_convert_tz(SimpleFunctionFactory& factory) { |
312 | 8 | factory.register_function<FunctionConvertTZ>(); |
313 | 8 | } |
314 | | |
315 | | } // namespace doris |