be/src/exprs/function/function_convert_tz.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cctz/time_zone.h> |
19 | | |
20 | | #include <cstddef> |
21 | | #include <cstdint> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/binary_cast.hpp" |
29 | | #include "core/block/block.h" |
30 | | #include "core/block/column_numbers.h" |
31 | | #include "core/block/column_with_type_and_name.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/column/column_string.h" |
36 | | #include "core/column/column_vector.h" |
37 | | #include "core/data_type/data_type.h" |
38 | | #include "core/data_type/data_type_date.h" |
39 | | #include "core/data_type/data_type_date_or_datetime_v2.h" |
40 | | #include "core/data_type/data_type_date_time.h" |
41 | | #include "core/data_type/data_type_nullable.h" |
42 | | #include "core/data_type/data_type_string.h" |
43 | | #include "core/data_type/define_primitive_type.h" |
44 | | #include "core/data_type/primitive_type.h" |
45 | | #include "core/string_ref.h" |
46 | | #include "core/types.h" |
47 | | #include "core/value/vdatetime_value.h" |
48 | | #include "exec/common/util.hpp" |
49 | | #include "exprs/aggregate/aggregate_function.h" |
50 | | #include "exprs/function/datetime_errors.h" |
51 | | #include "exprs/function/function.h" |
52 | | #include "exprs/function/function_helpers.h" |
53 | | #include "exprs/function/simple_function_factory.h" |
54 | | #include "exprs/function_context.h" |
55 | | #include "util/timezone_utils.h" |
56 | | |
57 | | namespace doris { |
58 | | #include "common/compile_check_begin.h" |
59 | | |
60 | | struct ConvertTzState { |
61 | | bool use_state = false; |
62 | | bool is_valid = false; |
63 | | cctz::time_zone from_tz; |
64 | | cctz::time_zone to_tz; |
65 | | }; |
66 | | |
67 | | class FunctionConvertTZ : public IFunction { |
68 | | constexpr static PrimitiveType PType = PrimitiveType::TYPE_DATETIMEV2; |
69 | | using DateValueType = PrimitiveTypeTraits<PType>::CppType; |
70 | | using ColumnType = PrimitiveTypeTraits<PType>::ColumnType; |
71 | | |
72 | | public: |
73 | | static constexpr auto name = "convert_tz"; |
74 | | |
75 | 87 | static FunctionPtr create() { return std::make_shared<FunctionConvertTZ>(); } |
76 | | |
77 | 1 | String get_name() const override { return name; } |
78 | | |
79 | 78 | size_t get_number_of_arguments() const override { return 3; } |
80 | | |
81 | 78 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
82 | 78 | return have_nullable(arguments) ? make_nullable(std::make_shared<DataTypeDateTimeV2>()) |
83 | 78 | : std::make_shared<DataTypeDateTimeV2>(); |
84 | 78 | } |
85 | | |
86 | | // default value of timezone is invalid, should skip to avoid wrong exception |
87 | 173 | bool use_default_implementation_for_nulls() const override { return false; } |
88 | | |
89 | 363 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
90 | 363 | if (scope == FunctionContext::THREAD_LOCAL) { |
91 | 285 | return Status::OK(); |
92 | 285 | } |
93 | 78 | std::shared_ptr<ConvertTzState> state = std::make_shared<ConvertTzState>(); |
94 | | |
95 | 78 | context->set_function_state(scope, state); |
96 | 78 | DCHECK_EQ(context->get_num_args(), 3); |
97 | 78 | const auto* const_from_tz = context->get_constant_col(1); |
98 | 78 | const auto* const_to_tz = context->get_constant_col(2); |
99 | | |
100 | | // ConvertTzState is used only when both the second and third parameters are constants |
101 | 78 | if (const_from_tz != nullptr && const_to_tz != nullptr) { |
102 | 44 | state->use_state = true; |
103 | 44 | init_convert_tz_state(state, const_from_tz, const_to_tz); |
104 | 44 | } else { |
105 | 34 | state->use_state = false; |
106 | 34 | } |
107 | | |
108 | 78 | return IFunction::open(context, scope); |
109 | 363 | } |
110 | | |
111 | | void init_convert_tz_state(std::shared_ptr<ConvertTzState> state, |
112 | | const ColumnPtrWrapper* const_from_tz, |
113 | 44 | const ColumnPtrWrapper* const_to_tz) { |
114 | 44 | auto const_data_from_tz = const_from_tz->column_ptr->get_data_at(0); |
115 | 44 | auto const_data_to_tz = const_to_tz->column_ptr->get_data_at(0); |
116 | | |
117 | | // from_tz and to_tz must both be non-null. |
118 | 44 | if (const_data_from_tz.data == nullptr || const_data_to_tz.data == nullptr) { |
119 | 0 | state->is_valid = false; |
120 | 0 | return; |
121 | 0 | } |
122 | | |
123 | 44 | auto from_tz_name = const_data_from_tz.to_string(); |
124 | 44 | auto to_tz_name = const_data_to_tz.to_string(); |
125 | | |
126 | 44 | if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, state->from_tz)) [[unlikely]] { |
127 | 2 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
128 | 2 | from_tz_name); |
129 | 2 | } |
130 | 42 | if (!TimezoneUtils::find_cctz_time_zone(to_tz_name, state->to_tz)) [[unlikely]] { |
131 | 2 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
132 | 2 | to_tz_name); |
133 | 2 | } |
134 | 40 | state->is_valid = true; |
135 | 40 | } |
136 | | |
137 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
138 | 95 | uint32_t result, size_t input_rows_count) const override { |
139 | 95 | auto* convert_tz_state = reinterpret_cast<ConvertTzState*>( |
140 | 95 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
141 | 95 | if (!convert_tz_state) { |
142 | 0 | return Status::RuntimeError( |
143 | 0 | "funciton context for function '{}' must have ConvertTzState;", get_name()); |
144 | 0 | } |
145 | | |
146 | 95 | auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
147 | 95 | NullMap& result_null_map = assert_cast<ColumnUInt8&>(*result_null_map_column).get_data(); |
148 | | |
149 | 95 | ColumnPtr argument_columns[3]; |
150 | 95 | bool col_const[3]; |
151 | | |
152 | | // calculate result null map and col_const |
153 | 380 | for (int i = 0; i < 3; ++i) { |
154 | 285 | ColumnPtr& col = block.get_by_position(arguments[i]).column; |
155 | 285 | col_const[i] = is_column_const(*col); |
156 | 285 | const NullMap* null_map = VectorizedUtils::get_null_map(col); |
157 | 285 | if (null_map) { |
158 | 141 | VectorizedUtils::update_null_map(result_null_map, *null_map, col_const[i]); |
159 | 141 | } |
160 | 285 | } |
161 | | |
162 | | // Extract nested columns from const(nullable) wrappers |
163 | 95 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
164 | 1 | *block.get_by_position(arguments[0]).column) |
165 | 1 | .convert_to_full_column() |
166 | 95 | : block.get_by_position(arguments[0]).column; |
167 | 95 | argument_columns[0] = remove_nullable(argument_columns[0]); |
168 | 95 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
169 | 95 | argument_columns[1] = remove_nullable(argument_columns[1]); |
170 | 95 | argument_columns[2] = remove_nullable(argument_columns[2]); |
171 | | |
172 | 95 | auto result_column = ColumnType::create(); |
173 | 95 | if (convert_tz_state->use_state) { |
174 | | // ignore argument columns, use cached timezone input in state |
175 | 52 | execute_tz_const_with_state(convert_tz_state, |
176 | 52 | assert_cast<const ColumnType*>(argument_columns[0].get()), |
177 | 52 | assert_cast<ColumnType*>(result_column.get()), |
178 | 52 | result_null_map, input_rows_count); |
179 | 52 | } else if (col_const[1] && col_const[2]) { |
180 | | // arguments are const |
181 | 0 | execute_tz_const(context, assert_cast<const ColumnType*>(argument_columns[0].get()), |
182 | 0 | assert_cast<const ColumnString*>(argument_columns[1].get()), |
183 | 0 | assert_cast<const ColumnString*>(argument_columns[2].get()), |
184 | 0 | assert_cast<ColumnType*>(result_column.get()), result_null_map, |
185 | 0 | input_rows_count); |
186 | 43 | } else { |
187 | 43 | _execute(context, assert_cast<const ColumnType*>(argument_columns[0].get()), |
188 | 43 | assert_cast<const ColumnString*>(argument_columns[1].get()), |
189 | 43 | assert_cast<const ColumnString*>(argument_columns[2].get()), |
190 | 43 | assert_cast<ColumnType*>(result_column.get()), result_null_map, |
191 | 43 | input_rows_count); |
192 | 43 | } //if const |
193 | | |
194 | 95 | if (block.get_data_type(result)->is_nullable()) { |
195 | 84 | block.get_by_position(result).column = ColumnNullable::create( |
196 | 84 | std::move(result_column), std::move(result_null_map_column)); |
197 | 84 | } else { |
198 | 11 | block.get_by_position(result).column = std::move(result_column); |
199 | 11 | } |
200 | 95 | return Status::OK(); |
201 | 95 | } |
202 | | |
203 | | private: |
204 | | static void _execute(FunctionContext* context, const ColumnType* date_column, |
205 | | const ColumnString* from_tz_column, const ColumnString* to_tz_column, |
206 | | ColumnType* result_column, NullMap& result_null_map, |
207 | 43 | size_t input_rows_count) { |
208 | 169 | for (size_t i = 0; i < input_rows_count; i++) { |
209 | 126 | if (result_null_map[i]) { |
210 | 0 | result_column->insert_default(); |
211 | 0 | continue; |
212 | 0 | } |
213 | 126 | auto from_tz = from_tz_column->get_data_at(i).to_string(); |
214 | 126 | auto to_tz = to_tz_column->get_data_at(i).to_string(); |
215 | 126 | execute_inner_loop(date_column, from_tz, to_tz, result_column, result_null_map, i); |
216 | 126 | } |
217 | 43 | } |
218 | | |
219 | | static void execute_tz_const_with_state(ConvertTzState* convert_tz_state, |
220 | | const ColumnType* date_column, |
221 | | ColumnType* result_column, NullMap& result_null_map, |
222 | 52 | size_t input_rows_count) { |
223 | 52 | cctz::time_zone& from_tz = convert_tz_state->from_tz; |
224 | 52 | cctz::time_zone& to_tz = convert_tz_state->to_tz; |
225 | 52 | auto push_null = [&](size_t row) { |
226 | 0 | result_null_map[row] = true; |
227 | 0 | result_column->insert_default(); |
228 | 0 | }; |
229 | | // state isn't valid means there's NULL in timezone input. so return null rather than exception |
230 | 52 | if (!convert_tz_state->is_valid) [[unlikely]] { |
231 | | // If an invalid timezone is present, return null |
232 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
233 | 0 | push_null(i); |
234 | 0 | } |
235 | 0 | return; |
236 | 0 | } |
237 | 157 | for (size_t i = 0; i < input_rows_count; i++) { |
238 | 105 | if (result_null_map[i]) { |
239 | 2 | result_column->insert_default(); |
240 | 2 | continue; |
241 | 2 | } |
242 | | |
243 | 103 | DateValueType ts_value = date_column->get_element(i); |
244 | 103 | DateValueType ts_value2; |
245 | | |
246 | 103 | std::pair<int64_t, int64_t> timestamp; |
247 | 103 | ts_value.unix_timestamp(×tamp, from_tz); |
248 | 103 | ts_value2.from_unixtime(timestamp, to_tz); |
249 | | |
250 | 103 | if (!ts_value2.is_valid_date()) [[unlikely]] { |
251 | 1 | throw_out_of_bound_convert_tz<DateValueType>(date_column->get_element(i), |
252 | 1 | from_tz.name(), to_tz.name()); |
253 | 1 | } |
254 | | |
255 | 103 | result_column->insert(Field::create_field<TYPE_DATETIMEV2>(ts_value2)); |
256 | 103 | } |
257 | 52 | } |
258 | | |
259 | | static void execute_tz_const(FunctionContext* context, const ColumnType* date_column, |
260 | | const ColumnString* from_tz_column, |
261 | | const ColumnString* to_tz_column, ColumnType* result_column, |
262 | 0 | NullMap& result_null_map, size_t input_rows_count) { |
263 | 0 | auto from_tz = from_tz_column->get_data_at(0).to_string(); |
264 | 0 | auto to_tz = to_tz_column->get_data_at(0).to_string(); |
265 | 0 | cctz::time_zone from_zone, to_zone; |
266 | 0 | if (!TimezoneUtils::find_cctz_time_zone(from_tz, from_zone)) [[unlikely]] { |
267 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
268 | 0 | from_tz); |
269 | 0 | } |
270 | 0 | if (!TimezoneUtils::find_cctz_time_zone(to_tz, to_zone)) [[unlikely]] { |
271 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
272 | 0 | to_tz); |
273 | 0 | } |
274 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
275 | 0 | if (result_null_map[i]) { |
276 | 0 | result_column->insert_default(); |
277 | 0 | continue; |
278 | 0 | } |
279 | 0 | execute_inner_loop(date_column, from_tz, to_tz, result_column, result_null_map, i); |
280 | 0 | } |
281 | 0 | } |
282 | | |
283 | | static void execute_inner_loop(const ColumnType* date_column, const std::string& from_tz_name, |
284 | | const std::string& to_tz_name, ColumnType* result_column, |
285 | 126 | NullMap& result_null_map, const size_t index_now) { |
286 | 126 | DateValueType ts_value = date_column->get_element(index_now); |
287 | 126 | cctz::time_zone from_tz {}, to_tz {}; |
288 | 126 | DateValueType ts_value2; |
289 | | |
290 | 126 | if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, from_tz)) [[unlikely]] { |
291 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
292 | 0 | from_tz_name); |
293 | 0 | } |
294 | 126 | if (!TimezoneUtils::find_cctz_time_zone(to_tz_name, to_tz)) [[unlikely]] { |
295 | 0 | throw Exception(ErrorCode::INVALID_ARGUMENT, "Operation {} invalid timezone: {}", name, |
296 | 0 | to_tz_name); |
297 | 0 | } |
298 | | |
299 | 126 | std::pair<int64_t, int64_t> timestamp; |
300 | 126 | ts_value.unix_timestamp(×tamp, from_tz); |
301 | 126 | ts_value2.from_unixtime(timestamp, to_tz); |
302 | | |
303 | 126 | if (!ts_value2.is_valid_date()) [[unlikely]] { |
304 | 0 | throw_out_of_bound_convert_tz<DateValueType>(date_column->get_element(index_now), |
305 | 0 | from_tz.name(), to_tz.name()); |
306 | 0 | } |
307 | | |
308 | 126 | result_column->insert(Field::create_field<TYPE_DATETIMEV2>(ts_value2)); |
309 | 126 | } |
310 | | }; |
311 | | |
312 | 8 | void register_function_convert_tz(SimpleFunctionFactory& factory) { |
313 | 8 | factory.register_function<FunctionConvertTZ>(); |
314 | 8 | } |
315 | | |
316 | | } // namespace doris |
317 | | |
318 | | #include "common/compile_check_end.h" |