be/src/exprs/function/function_format.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <glog/logging.h> |
19 | | |
20 | | #include <cstdio> |
21 | | #include <regex> |
22 | | #include <vector> |
23 | | |
24 | | #include "common/status.h" |
25 | | #include "core/assert_cast.h" |
26 | | #include "core/column/column.h" |
27 | | #include "core/column/column_vector.h" |
28 | | #include "core/data_type/data_type_number.h" |
29 | | #include "core/data_type/define_primitive_type.h" |
30 | | #include "core/types.h" |
31 | | #include "exprs/function/cast_type_to_either.h" |
32 | | #include "exprs/function/simple_function_factory.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | class FunctionFormatNumber : public IFunction { |
37 | | public: |
38 | | static constexpr auto name = "format_number"; |
39 | | |
40 | | static constexpr const char* UNITS[6] = {"", "K", "M", "B", "T", "Q"}; |
41 | | |
42 | 2 | static FunctionPtr create() { return std::make_shared<FunctionFormatNumber>(); } |
43 | | |
44 | 1 | String get_name() const override { return name; } |
45 | | |
46 | 0 | size_t get_number_of_arguments() const override { return 1; } |
47 | | |
48 | 1 | bool is_variadic() const override { return false; } |
49 | | |
50 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
51 | 0 | return std::make_shared<DataTypeString>(); |
52 | 0 | } |
53 | | |
54 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
55 | 0 | uint32_t result, size_t input_rows_count) const override { |
56 | 0 | auto column = block.get_by_position(arguments[0]).column; |
57 | 0 | const auto& column_data = assert_cast<const ColumnFloat64*>(column.get())->get_data(); |
58 | 0 | auto col_res = ColumnString::create(); |
59 | 0 | fmt::memory_buffer buffer; |
60 | |
|
61 | 0 | for (auto i = 0; i < input_rows_count; ++i) { |
62 | 0 | auto res_data = format_number(buffer, column_data[i]); |
63 | 0 | col_res->insert_data(res_data.data(), res_data.length()); |
64 | 0 | } |
65 | 0 | block.replace_by_position(result, std::move(col_res)); |
66 | 0 | return Status::OK(); |
67 | 0 | } |
68 | | |
69 | 0 | std::string format_number(fmt::memory_buffer& buffer, double number) const { |
70 | 0 | buffer.clear(); |
71 | 0 | double abs_number = std::abs(number); |
72 | 0 | int unit_index = 0; |
73 | 0 | while (abs_number >= 1000 && unit_index < 5) { |
74 | 0 | abs_number /= 1000; |
75 | 0 | ++unit_index; |
76 | 0 | } |
77 | 0 | if (number < 0) { |
78 | 0 | fmt::format_to(buffer, "-"); |
79 | 0 | } |
80 | 0 | if (abs_number == 1) { |
81 | | //eg: 1000 ---> 1K |
82 | 0 | fmt::format_to(buffer, "{}", abs_number); |
83 | 0 | } else if (abs_number < 10) { |
84 | | //eg: 1239 ---> 1.24K only want to show 2 decimal |
85 | 0 | fmt::format_to(buffer, "{:.2f}", abs_number); |
86 | 0 | } else if (abs_number < 100) { |
87 | | //eg: 12399999 ---> 12.4M only want to show 1 decimal |
88 | 0 | fmt::format_to(buffer, "{:.1f}", abs_number); |
89 | 0 | } else { |
90 | | // eg: 999999999999999 ---> 1000T only want to show 0 decimal |
91 | 0 | fmt::format_to(buffer, "{:.0f}", abs_number); |
92 | 0 | } |
93 | 0 | fmt::format_to(buffer, UNITS[unit_index]); |
94 | 0 | return fmt::to_string(buffer); |
95 | 0 | } |
96 | | }; |
97 | | |
98 | | class FunctionFormat : public IFunction { |
99 | | public: |
100 | | static constexpr auto name = "format"; |
101 | | |
102 | 2 | static FunctionPtr create() { return std::make_shared<FunctionFormat>(); } |
103 | | |
104 | 0 | String get_name() const override { return name; } |
105 | | |
106 | 0 | size_t get_number_of_arguments() const override { return 0; } |
107 | | |
108 | 1 | bool is_variadic() const override { return true; } |
109 | | |
110 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
111 | 0 | return std::make_shared<DataTypeString>(); |
112 | 0 | } |
113 | | |
114 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
115 | 0 | uint32_t result, size_t input_rows_count) const override { |
116 | 0 | DCHECK_GE(arguments.size(), 2); |
117 | 0 | bool valid = |
118 | 0 | cast_type(block.get_by_position(arguments[1]).type.get(), [&](const auto& type) { |
119 | 0 | using DataType = std::decay_t<decltype(type)>; |
120 | 0 | using ColVecData = |
121 | 0 | std::conditional_t<is_number(DataType::PType), |
122 | 0 | ColumnVector<DataType::PType>, ColumnString>; |
123 | 0 | if (auto col = check_and_get_column<ColVecData>( |
124 | 0 | block.get_by_position(arguments[1]).column.get()) || |
125 | 0 | is_column_const(*block.get_by_position(arguments[1]).column)) { |
126 | 0 | execute_inner<ColVecData, DataType::PType>(block, arguments, result, |
127 | 0 | input_rows_count); |
128 | 0 | return true; |
129 | 0 | } |
130 | 0 | return false; |
131 | 0 | }); Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE3EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE4EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE5EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE6EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE7EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE8EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeNumberILNS_13PrimitiveTypeE9EEEEEDaSC_ Unexecuted instantiation: _ZZNK5doris14FunctionFormat12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlRKT_E_clINS_14DataTypeStringEEEDaSC_ |
132 | 0 | if (!valid) { |
133 | 0 | return Status::RuntimeError( |
134 | 0 | "{}'s argument does not match the expected data type, type: {}, column: {}", |
135 | 0 | get_name(), block.get_by_position(arguments[1]).type->get_name(), |
136 | 0 | block.get_by_position(arguments[1]).column->dump_structure()); |
137 | 0 | } |
138 | 0 | return Status::OK(); |
139 | 0 | } |
140 | | |
141 | | template <typename F> |
142 | 0 | static bool cast_type(const IDataType* type, F&& f) { |
143 | 0 | return cast_type_to_either<DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64, |
144 | 0 | DataTypeInt128, DataTypeFloat32, DataTypeFloat64, |
145 | 0 | DataTypeString>(type, std::forward<F>(f)); |
146 | 0 | } |
147 | | |
148 | | template <typename ColVecData, PrimitiveType T> |
149 | | void execute_inner(Block& block, const ColumnNumbers& arguments, uint32_t result, |
150 | 0 | size_t input_rows_count) const { |
151 | 0 | size_t argument_size = arguments.size(); |
152 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
153 | 0 | auto result_column = ColumnString::create(); |
154 | | |
155 | | // maybe most user is format(const, column), so only handle this case const column |
156 | 0 | if (argument_size == 2) { |
157 | 0 | std::vector<uint8_t> is_consts(argument_size); |
158 | 0 | std::tie(argument_columns[0], is_consts[0]) = |
159 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
160 | 0 | std::tie(argument_columns[1], is_consts[1]) = |
161 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
162 | 0 | execute_for_two_argument<ColVecData, T>(argument_columns, is_consts, |
163 | 0 | assert_cast<ColumnString*>(result_column.get()), |
164 | 0 | input_rows_count); |
165 | 0 | } else { |
166 | 0 | for (size_t i = 0; i < argument_size; ++i) { |
167 | 0 | argument_columns[i] = block.get_by_position(arguments[i]) |
168 | 0 | .column->convert_to_full_column_if_const(); |
169 | 0 | } |
170 | 0 | execute_for_others_arg<ColVecData, T>(argument_columns, |
171 | 0 | assert_cast<ColumnString*>(result_column.get()), |
172 | 0 | argument_size, input_rows_count); |
173 | 0 | } |
174 | |
|
175 | 0 | block.replace_by_position(result, std::move(result_column)); |
176 | 0 | } Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE3EEELS3_3EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE4EEELS3_4EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE5EEELS3_5EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELS3_6EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE7EEELS3_7EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE8EEELS3_8EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_12ColumnVectorILNS_13PrimitiveTypeE9EEELS3_9EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris14FunctionFormat13execute_innerINS_9ColumnStrIjEELNS_13PrimitiveTypeE23EEEvRNS_5BlockERKSt6vectorIjSaIjEEjm |
177 | | |
178 | | template <typename ColVecData, PrimitiveType T> |
179 | | void execute_for_two_argument(std::vector<ColumnPtr>& argument_columns, |
180 | | std::vector<uint8_t>& is_consts, ColumnString* result_data_column, |
181 | 0 | size_t input_rows_count) const { |
182 | 0 | const auto& format_column = assert_cast<const ColumnString&>(*argument_columns[0].get()); |
183 | 0 | const auto& value_column = assert_cast<const ColVecData&>(*argument_columns[1].get()); |
184 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
185 | 0 | auto format = |
186 | 0 | format_column.get_data_at(index_check_const(i, is_consts[0])).to_string_view(); |
187 | 0 | std::string res; |
188 | 0 | try { |
189 | 0 | if constexpr (is_string_type(T)) { |
190 | 0 | auto value = value_column.get_data_at(index_check_const(i, is_consts[1])); |
191 | 0 | res = fmt::format(format, value); |
192 | 0 | } else { |
193 | 0 | auto value = value_column.get_data()[index_check_const(i, is_consts[1])]; |
194 | 0 | res = fmt::format(format, value); |
195 | 0 | } |
196 | 0 | } catch (const std::exception& e) { |
197 | 0 | throw doris::Exception( |
198 | 0 | ErrorCode::INVALID_ARGUMENT, |
199 | 0 | "Invalid Input argument \"{}\" of function format, error: {}", format, |
200 | 0 | e.what()); |
201 | 0 | } |
202 | 0 | result_data_column->insert_data(res.data(), res.length()); |
203 | 0 | } |
204 | 0 | } Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE3EEELS3_3EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE4EEELS3_4EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE5EEELS3_5EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELS3_6EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE7EEELS3_7EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE8EEELS3_8EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_12ColumnVectorILNS_13PrimitiveTypeE9EEELS3_9EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPNS_9ColumnStrIjEEm Unexecuted instantiation: _ZNK5doris14FunctionFormat24execute_for_two_argumentINS_9ColumnStrIjEELNS_13PrimitiveTypeE23EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EERS5_IhSaIhEEPS3_m |
205 | | |
206 | | template <typename ColVecData, PrimitiveType T> |
207 | | void execute_for_others_arg(std::vector<ColumnPtr>& argument_columns, |
208 | | ColumnString* result_data_column, size_t argument_size, |
209 | 0 | size_t input_rows_count) const { |
210 | 0 | const auto& format_column = assert_cast<const ColumnString&>(*argument_columns[0].get()); |
211 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
212 | 0 | auto format = format_column.get_data_at(i).to_string_view(); |
213 | 0 | std::string res; |
214 | 0 | fmt::dynamic_format_arg_store<fmt::format_context> args; |
215 | 0 | if constexpr (is_string_type(T)) { |
216 | 0 | for (int col = 1; col < argument_size; ++col) { |
217 | 0 | const auto& arg_column_data = |
218 | 0 | assert_cast<const ColVecData&>(*argument_columns[col].get()); |
219 | 0 | args.push_back(arg_column_data.get_data_at(i).to_string()); |
220 | 0 | } |
221 | 0 | } else { |
222 | 0 | for (int col = 1; col < argument_size; ++col) { |
223 | 0 | const auto& arg_column_data = |
224 | 0 | assert_cast<const ColVecData&>(*argument_columns[col].get()).get_data(); |
225 | 0 | args.push_back(arg_column_data[i]); |
226 | 0 | } |
227 | 0 | } |
228 | 0 | try { |
229 | 0 | res = fmt::vformat(format, args); |
230 | 0 | } catch (const std::exception& e) { |
231 | 0 | throw doris::Exception( |
232 | 0 | ErrorCode::INVALID_ARGUMENT, |
233 | 0 | "Invalid Input argument \"{}\" of function format, error: {}", format, |
234 | 0 | e.what()); |
235 | 0 | } |
236 | 0 | result_data_column->insert_data(res.data(), res.length()); |
237 | 0 | } |
238 | 0 | } Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE3EEELS3_3EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE4EEELS3_4EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE5EEELS3_5EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE6EEELS3_6EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE7EEELS3_7EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE8EEELS3_8EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_12ColumnVectorILNS_13PrimitiveTypeE9EEELS3_9EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPNS_9ColumnStrIjEEmm Unexecuted instantiation: _ZNK5doris14FunctionFormat22execute_for_others_argINS_9ColumnStrIjEELNS_13PrimitiveTypeE23EEEvRSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEPS3_mm |
239 | | }; |
240 | | |
241 | 1 | void register_function_format(SimpleFunctionFactory& factory) { |
242 | 1 | factory.register_function<FunctionFormatNumber>(); |
243 | 1 | factory.register_function<FunctionFormat>(); |
244 | 1 | } |
245 | | |
246 | | } // namespace doris |