be/src/exprs/function/function_decode_varchar.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <fmt/core.h> |
19 | | |
20 | | #include <cstddef> |
21 | | #include <limits> |
22 | | #include <type_traits> |
23 | | |
24 | | #include "common/exception.h" |
25 | | #include "common/status.h" |
26 | | #include "core/column/column_const.h" |
27 | | #include "core/column/column_vector.h" |
28 | | #include "core/data_type/data_type.h" |
29 | | #include "core/data_type/data_type_number.h" |
30 | | #include "core/data_type/data_type_string.h" |
31 | | #include "core/data_type/primitive_type.h" |
32 | | #include "core/types.h" |
33 | | #include "exprs/function/function.h" |
34 | | #include "exprs/function/function_helpers.h" |
35 | | #include "exprs/function/simple_function_factory.h" |
36 | | #include "util/simd/reverse_copy_bytes.h" |
37 | | |
38 | | namespace doris { |
39 | | |
40 | | template <PrimitiveType IntegerPType> |
41 | | class FunctionDecodeAsVarchar : public IFunction { |
42 | | public: |
43 | | using IntegerType = typename PrimitiveTypeTraits<IntegerPType>::CppType; |
44 | | static constexpr auto name = "decode_as_varchar"; |
45 | 134 | static FunctionPtr create() { return std::make_shared<FunctionDecodeAsVarchar>(); }_ZN5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE6createEv Line | Count | Source | 45 | 18 | static FunctionPtr create() { return std::make_shared<FunctionDecodeAsVarchar>(); } |
_ZN5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE6createEv Line | Count | Source | 45 | 20 | static FunctionPtr create() { return std::make_shared<FunctionDecodeAsVarchar>(); } |
_ZN5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE6createEv Line | Count | Source | 45 | 39 | static FunctionPtr create() { return std::make_shared<FunctionDecodeAsVarchar>(); } |
_ZN5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE6createEv Line | Count | Source | 45 | 57 | static FunctionPtr create() { return std::make_shared<FunctionDecodeAsVarchar>(); } |
|
46 | | |
47 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE8get_nameB5cxx11Ev |
48 | | |
49 | 0 | size_t get_number_of_arguments() const override { return 1; }Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE23get_number_of_argumentsEv |
50 | | |
51 | 102 | bool is_variadic() const override { return true; }_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE11is_variadicEv Line | Count | Source | 51 | 10 | bool is_variadic() const override { return true; } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE11is_variadicEv Line | Count | Source | 51 | 12 | bool is_variadic() const override { return true; } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE11is_variadicEv Line | Count | Source | 51 | 31 | bool is_variadic() const override { return true; } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE11is_variadicEv Line | Count | Source | 51 | 49 | bool is_variadic() const override { return true; } |
|
52 | | |
53 | 32 | DataTypes get_variadic_argument_types_impl() const override { |
54 | 32 | return {std::make_shared<typename PrimitiveTypeTraits<IntegerPType>::DataType>()}; |
55 | 32 | } _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE32get_variadic_argument_types_implEv Line | Count | Source | 53 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 54 | 8 | return {std::make_shared<typename PrimitiveTypeTraits<IntegerPType>::DataType>()}; | 55 | 8 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE32get_variadic_argument_types_implEv Line | Count | Source | 53 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 54 | 8 | return {std::make_shared<typename PrimitiveTypeTraits<IntegerPType>::DataType>()}; | 55 | 8 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE32get_variadic_argument_types_implEv Line | Count | Source | 53 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 54 | 8 | return {std::make_shared<typename PrimitiveTypeTraits<IntegerPType>::DataType>()}; | 55 | 8 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE32get_variadic_argument_types_implEv Line | Count | Source | 53 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 54 | 8 | return {std::make_shared<typename PrimitiveTypeTraits<IntegerPType>::DataType>()}; | 55 | 8 | } |
|
56 | | |
57 | 98 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
58 | 98 | if (arguments.size() != 1) { |
59 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
60 | 0 | "Function {} requires 1 arguments, got {}", name, |
61 | 0 | arguments.size()); |
62 | 0 | } |
63 | | |
64 | 98 | return std::make_shared<DataTypeString>(); |
65 | 98 | } _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 57 | 9 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 58 | 9 | if (arguments.size() != 1) { | 59 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 60 | 0 | "Function {} requires 1 arguments, got {}", name, | 61 | 0 | arguments.size()); | 62 | 0 | } | 63 | | | 64 | 9 | return std::make_shared<DataTypeString>(); | 65 | 9 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 57 | 11 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 58 | 11 | if (arguments.size() != 1) { | 59 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 60 | 0 | "Function {} requires 1 arguments, got {}", name, | 61 | 0 | arguments.size()); | 62 | 0 | } | 63 | | | 64 | 11 | return std::make_shared<DataTypeString>(); | 65 | 11 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 57 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 58 | 30 | if (arguments.size() != 1) { | 59 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 60 | 0 | "Function {} requires 1 arguments, got {}", name, | 61 | 0 | arguments.size()); | 62 | 0 | } | 63 | | | 64 | 30 | return std::make_shared<DataTypeString>(); | 65 | 30 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 57 | 48 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 58 | 48 | if (arguments.size() != 1) { | 59 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 60 | 0 | "Function {} requires 1 arguments, got {}", name, | 61 | 0 | arguments.size()); | 62 | 0 | } | 63 | | | 64 | 48 | return std::make_shared<DataTypeString>(); | 65 | 48 | } |
|
66 | | |
67 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
68 | 119 | uint32_t result, size_t input_rows_count) const override { |
69 | 119 | const auto* col_source = |
70 | 119 | assert_cast<const typename PrimitiveTypeTraits<IntegerPType>::ColumnType*>( |
71 | 119 | block.get_by_position(arguments[0]).column.get()); |
72 | | |
73 | 119 | auto col_res = ColumnString::create(); |
74 | | |
75 | 119 | ColumnString::Chars& col_res_data = col_res->get_chars(); |
76 | 119 | ColumnString::Offsets& col_res_offset = col_res->get_offsets(); |
77 | 119 | col_res_data.resize(input_rows_count * sizeof(IntegerType)); |
78 | 119 | col_res_offset.resize(input_rows_count); |
79 | | |
80 | 245k | for (Int32 i = 0; i < input_rows_count; ++i) { |
81 | 245k | IntegerType value = col_source->get_element(i); |
82 | 245k | const auto* const __restrict ui8_ptr = reinterpret_cast<const UInt8*>(&value); |
83 | 245k | UInt32 str_size = static_cast<UInt32>(*ui8_ptr) & 0x7F; |
84 | | |
85 | 245k | if (str_size >= sizeof(IntegerType)) { |
86 | 1 | const auto& type_ptr = block.get_by_position(arguments[0]).type; |
87 | 1 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
88 | 1 | "Invalid input of function {}, input type {} value {}, " |
89 | 1 | "string size {}, should not be larger than {}", |
90 | 1 | name, type_ptr->get_name(), value, str_size, |
91 | 1 | sizeof(IntegerType)); |
92 | 1 | } |
93 | | |
94 | | // col_res_offset[-1] is valid for PaddedPODArray, will get 0 |
95 | 245k | col_res_offset[i] = col_res_offset[i - 1] + str_size; |
96 | 245k | value <<= 1; |
97 | | |
98 | 245k | simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size, |
99 | 245k | ui8_ptr + sizeof(IntegerType) - str_size, str_size); |
100 | 245k | } |
101 | 118 | col_res_data.resize(col_res_offset[col_res_offset.size() - 1]); |
102 | | |
103 | 118 | block.get_by_position(result).column = std::move(col_res); |
104 | | |
105 | 118 | return Status::OK(); |
106 | 119 | } _ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE4EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 10 | uint32_t result, size_t input_rows_count) const override { | 69 | 10 | const auto* col_source = | 70 | 10 | assert_cast<const typename PrimitiveTypeTraits<IntegerPType>::ColumnType*>( | 71 | 10 | block.get_by_position(arguments[0]).column.get()); | 72 | | | 73 | 10 | auto col_res = ColumnString::create(); | 74 | | | 75 | 10 | ColumnString::Chars& col_res_data = col_res->get_chars(); | 76 | 10 | ColumnString::Offsets& col_res_offset = col_res->get_offsets(); | 77 | 10 | col_res_data.resize(input_rows_count * sizeof(IntegerType)); | 78 | 10 | col_res_offset.resize(input_rows_count); | 79 | | | 80 | 16.4k | for (Int32 i = 0; i < input_rows_count; ++i) { | 81 | 16.3k | IntegerType value = col_source->get_element(i); | 82 | 16.3k | const auto* const __restrict ui8_ptr = reinterpret_cast<const UInt8*>(&value); | 83 | 16.3k | UInt32 str_size = static_cast<UInt32>(*ui8_ptr) & 0x7F; | 84 | | | 85 | 16.3k | if (str_size >= sizeof(IntegerType)) { | 86 | 1 | const auto& type_ptr = block.get_by_position(arguments[0]).type; | 87 | 1 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 88 | 1 | "Invalid input of function {}, input type {} value {}, " | 89 | 1 | "string size {}, should not be larger than {}", | 90 | 1 | name, type_ptr->get_name(), value, str_size, | 91 | 1 | sizeof(IntegerType)); | 92 | 1 | } | 93 | | | 94 | | // col_res_offset[-1] is valid for PaddedPODArray, will get 0 | 95 | 16.3k | col_res_offset[i] = col_res_offset[i - 1] + str_size; | 96 | 16.3k | value <<= 1; | 97 | | | 98 | 16.3k | simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size, | 99 | 16.3k | ui8_ptr + sizeof(IntegerType) - str_size, str_size); | 100 | 16.3k | } | 101 | 9 | col_res_data.resize(col_res_offset[col_res_offset.size() - 1]); | 102 | | | 103 | 9 | block.get_by_position(result).column = std::move(col_res); | 104 | | | 105 | 9 | return Status::OK(); | 106 | 10 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE5EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 13 | uint32_t result, size_t input_rows_count) const override { | 69 | 13 | const auto* col_source = | 70 | 13 | assert_cast<const typename PrimitiveTypeTraits<IntegerPType>::ColumnType*>( | 71 | 13 | block.get_by_position(arguments[0]).column.get()); | 72 | | | 73 | 13 | auto col_res = ColumnString::create(); | 74 | | | 75 | 13 | ColumnString::Chars& col_res_data = col_res->get_chars(); | 76 | 13 | ColumnString::Offsets& col_res_offset = col_res->get_offsets(); | 77 | 13 | col_res_data.resize(input_rows_count * sizeof(IntegerType)); | 78 | 13 | col_res_offset.resize(input_rows_count); | 79 | | | 80 | 32.7k | for (Int32 i = 0; i < input_rows_count; ++i) { | 81 | 32.7k | IntegerType value = col_source->get_element(i); | 82 | 32.7k | const auto* const __restrict ui8_ptr = reinterpret_cast<const UInt8*>(&value); | 83 | 32.7k | UInt32 str_size = static_cast<UInt32>(*ui8_ptr) & 0x7F; | 84 | | | 85 | 32.7k | if (str_size >= sizeof(IntegerType)) { | 86 | 0 | const auto& type_ptr = block.get_by_position(arguments[0]).type; | 87 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 88 | 0 | "Invalid input of function {}, input type {} value {}, " | 89 | 0 | "string size {}, should not be larger than {}", | 90 | 0 | name, type_ptr->get_name(), value, str_size, | 91 | 0 | sizeof(IntegerType)); | 92 | 0 | } | 93 | | | 94 | | // col_res_offset[-1] is valid for PaddedPODArray, will get 0 | 95 | 32.7k | col_res_offset[i] = col_res_offset[i - 1] + str_size; | 96 | 32.7k | value <<= 1; | 97 | | | 98 | 32.7k | simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size, | 99 | 32.7k | ui8_ptr + sizeof(IntegerType) - str_size, str_size); | 100 | 32.7k | } | 101 | 13 | col_res_data.resize(col_res_offset[col_res_offset.size() - 1]); | 102 | | | 103 | 13 | block.get_by_position(result).column = std::move(col_res); | 104 | | | 105 | 13 | return Status::OK(); | 106 | 13 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE6EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 40 | uint32_t result, size_t input_rows_count) const override { | 69 | 40 | const auto* col_source = | 70 | 40 | assert_cast<const typename PrimitiveTypeTraits<IntegerPType>::ColumnType*>( | 71 | 40 | block.get_by_position(arguments[0]).column.get()); | 72 | | | 73 | 40 | auto col_res = ColumnString::create(); | 74 | | | 75 | 40 | ColumnString::Chars& col_res_data = col_res->get_chars(); | 76 | 40 | ColumnString::Offsets& col_res_offset = col_res->get_offsets(); | 77 | 40 | col_res_data.resize(input_rows_count * sizeof(IntegerType)); | 78 | 40 | col_res_offset.resize(input_rows_count); | 79 | | | 80 | 65.6k | for (Int32 i = 0; i < input_rows_count; ++i) { | 81 | 65.5k | IntegerType value = col_source->get_element(i); | 82 | 65.5k | const auto* const __restrict ui8_ptr = reinterpret_cast<const UInt8*>(&value); | 83 | 65.5k | UInt32 str_size = static_cast<UInt32>(*ui8_ptr) & 0x7F; | 84 | | | 85 | 65.5k | if (str_size >= sizeof(IntegerType)) { | 86 | 0 | const auto& type_ptr = block.get_by_position(arguments[0]).type; | 87 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 88 | 0 | "Invalid input of function {}, input type {} value {}, " | 89 | 0 | "string size {}, should not be larger than {}", | 90 | 0 | name, type_ptr->get_name(), value, str_size, | 91 | 0 | sizeof(IntegerType)); | 92 | 0 | } | 93 | | | 94 | | // col_res_offset[-1] is valid for PaddedPODArray, will get 0 | 95 | 65.5k | col_res_offset[i] = col_res_offset[i - 1] + str_size; | 96 | 65.5k | value <<= 1; | 97 | | | 98 | 65.5k | simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size, | 99 | 65.5k | ui8_ptr + sizeof(IntegerType) - str_size, str_size); | 100 | 65.5k | } | 101 | 40 | col_res_data.resize(col_res_offset[col_res_offset.size() - 1]); | 102 | | | 103 | 40 | block.get_by_position(result).column = std::move(col_res); | 104 | | | 105 | 40 | return Status::OK(); | 106 | 40 | } |
_ZNK5doris23FunctionDecodeAsVarcharILNS_13PrimitiveTypeE7EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 68 | 56 | uint32_t result, size_t input_rows_count) const override { | 69 | 56 | const auto* col_source = | 70 | 56 | assert_cast<const typename PrimitiveTypeTraits<IntegerPType>::ColumnType*>( | 71 | 56 | block.get_by_position(arguments[0]).column.get()); | 72 | | | 73 | 56 | auto col_res = ColumnString::create(); | 74 | | | 75 | 56 | ColumnString::Chars& col_res_data = col_res->get_chars(); | 76 | 56 | ColumnString::Offsets& col_res_offset = col_res->get_offsets(); | 77 | 56 | col_res_data.resize(input_rows_count * sizeof(IntegerType)); | 78 | 56 | col_res_offset.resize(input_rows_count); | 79 | | | 80 | 131k | for (Int32 i = 0; i < input_rows_count; ++i) { | 81 | 131k | IntegerType value = col_source->get_element(i); | 82 | 131k | const auto* const __restrict ui8_ptr = reinterpret_cast<const UInt8*>(&value); | 83 | 131k | UInt32 str_size = static_cast<UInt32>(*ui8_ptr) & 0x7F; | 84 | | | 85 | 131k | if (str_size >= sizeof(IntegerType)) { | 86 | 0 | const auto& type_ptr = block.get_by_position(arguments[0]).type; | 87 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 88 | 0 | "Invalid input of function {}, input type {} value {}, " | 89 | 0 | "string size {}, should not be larger than {}", | 90 | 0 | name, type_ptr->get_name(), value, str_size, | 91 | 0 | sizeof(IntegerType)); | 92 | 0 | } | 93 | | | 94 | | // col_res_offset[-1] is valid for PaddedPODArray, will get 0 | 95 | 131k | col_res_offset[i] = col_res_offset[i - 1] + str_size; | 96 | 131k | value <<= 1; | 97 | | | 98 | 131k | simd::reverse_copy_bytes(col_res_data.data() + col_res_offset[i - 1], str_size, | 99 | 131k | ui8_ptr + sizeof(IntegerType) - str_size, str_size); | 100 | 131k | } | 101 | 56 | col_res_data.resize(col_res_offset[col_res_offset.size() - 1]); | 102 | | | 103 | 56 | block.get_by_position(result).column = std::move(col_res); | 104 | | | 105 | 56 | return Status::OK(); | 106 | 56 | } |
|
107 | | }; |
108 | | |
109 | 8 | void register_function_decode_as_varchar(SimpleFunctionFactory& factory) { |
110 | 8 | factory.register_function<FunctionDecodeAsVarchar<TYPE_SMALLINT>>(); |
111 | 8 | factory.register_function<FunctionDecodeAsVarchar<TYPE_INT>>(); |
112 | 8 | factory.register_function<FunctionDecodeAsVarchar<TYPE_BIGINT>>(); |
113 | 8 | factory.register_function<FunctionDecodeAsVarchar<TYPE_LARGEINT>>(); |
114 | 8 | } |
115 | | |
116 | | } // namespace doris |