Coverage Report

Created: 2026-04-15 12:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_encode_varchar.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <limits>
20
#include <type_traits>
21
22
#include "common/exception.h"
23
#include "common/status.h"
24
#include "core/column/column_const.h"
25
#include "core/column/column_vector.h"
26
#include "core/data_type/data_type.h"
27
#include "core/data_type/data_type_number.h"
28
#include "core/data_type/data_type_string.h"
29
#include "core/data_type/primitive_type.h"
30
#include "core/types.h"
31
#include "exprs/function/function.h"
32
#include "exprs/function/function_helpers.h"
33
#include "exprs/function/simple_function_factory.h"
34
#include "util/simd/reverse_copy_bytes.h"
35
36
namespace doris {
37
38
struct EncodeAsSmallInt {
39
    static constexpr auto name = "encode_as_smallint";
40
};
41
42
struct EncodeAsInt {
43
    static constexpr auto name = "encode_as_int";
44
};
45
46
struct EncodeAsBigInt {
47
    static constexpr auto name = "encode_as_bigint";
48
};
49
50
struct EncodeAsLargeInt {
51
    static constexpr auto name = "encode_as_largeint";
52
};
53
54
template <typename Name, PrimitiveType ReturnType>
55
class FunctionEncodeVarchar : public IFunction {
56
public:
57
    static constexpr auto name = Name::name;
58
223
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE6createEv
Line
Count
Source
58
24
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE6createEv
Line
Count
Source
58
37
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE6createEv
Line
Count
Source
58
68
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE6createEv
Line
Count
Source
58
94
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
59
60
8
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE8get_nameB5cxx11Ev
Line
Count
Source
60
2
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE8get_nameB5cxx11Ev
Line
Count
Source
60
2
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE8get_nameB5cxx11Ev
Line
Count
Source
60
2
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE8get_nameB5cxx11Ev
Line
Count
Source
60
2
    String get_name() const override { return name; }
61
62
179
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE23get_number_of_argumentsEv
Line
Count
Source
62
13
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE23get_number_of_argumentsEv
Line
Count
Source
62
26
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE23get_number_of_argumentsEv
Line
Count
Source
62
57
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE23get_number_of_argumentsEv
Line
Count
Source
62
83
    size_t get_number_of_arguments() const override { return 1; }
63
64
179
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
179
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
179
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
13
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
13
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
26
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
26
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
26
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
57
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
57
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
57
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
83
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
83
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
83
    }
67
68
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
69
296
                        uint32_t result, size_t input_rows_count) const override {
70
296
        const ColumnString* col_str =
71
296
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
296
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
296
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
9
            return Status::InternalError(
78
9
                    "String is too long to encode, input string size {}, max valid string "
79
9
                    "size for {} is {}",
80
9
                    max_str_size, name,
81
9
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
9
        }
83
84
287
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
287
        auto& col_res_data = col_res->get_data();
86
87
961k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
961k
            const char* str_ptr = col_str->get_data_at(i).data;
89
961k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
961k
            auto* res = &col_res_data[i];
91
961k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
961k
            simd::reverse_copy_bytes(ui8_ptr,
95
961k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
961k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
961k
            memset(ui8_ptr, str_size << 1, 1);
100
961k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
961k
            *res = *res &
103
961k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
961k
        }
105
106
287
        block.get_by_position(result).column = std::move(col_res);
107
108
287
        return Status::OK();
109
296
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
14
                        uint32_t result, size_t input_rows_count) const override {
70
14
        const ColumnString* col_str =
71
14
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
14
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
14
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
2
            return Status::InternalError(
78
2
                    "String is too long to encode, input string size {}, max valid string "
79
2
                    "size for {} is {}",
80
2
                    max_str_size, name,
81
2
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
2
        }
83
84
12
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
12
        auto& col_res_data = col_res->get_data();
86
87
32.7k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
32.7k
            const char* str_ptr = col_str->get_data_at(i).data;
89
32.7k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
32.7k
            auto* res = &col_res_data[i];
91
32.7k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
32.7k
            simd::reverse_copy_bytes(ui8_ptr,
95
32.7k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
32.7k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
32.7k
            memset(ui8_ptr, str_size << 1, 1);
100
32.7k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
32.7k
            *res = *res &
103
32.7k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
32.7k
        }
105
106
12
        block.get_by_position(result).column = std::move(col_res);
107
108
12
        return Status::OK();
109
14
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
50
                        uint32_t result, size_t input_rows_count) const override {
70
50
        const ColumnString* col_str =
71
50
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
50
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
50
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
2
            return Status::InternalError(
78
2
                    "String is too long to encode, input string size {}, max valid string "
79
2
                    "size for {} is {}",
80
2
                    max_str_size, name,
81
2
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
2
        }
83
84
48
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
48
        auto& col_res_data = col_res->get_data();
86
87
65.6k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
65.5k
            const char* str_ptr = col_str->get_data_at(i).data;
89
65.5k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
65.5k
            auto* res = &col_res_data[i];
91
65.5k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
65.5k
            simd::reverse_copy_bytes(ui8_ptr,
95
65.5k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
65.5k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
65.5k
            memset(ui8_ptr, str_size << 1, 1);
100
65.5k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
65.5k
            *res = *res &
103
65.5k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
65.5k
        }
105
106
48
        block.get_by_position(result).column = std::move(col_res);
107
108
48
        return Status::OK();
109
50
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
77
                        uint32_t result, size_t input_rows_count) const override {
70
77
        const ColumnString* col_str =
71
77
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
77
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
77
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
2
            return Status::InternalError(
78
2
                    "String is too long to encode, input string size {}, max valid string "
79
2
                    "size for {} is {}",
80
2
                    max_str_size, name,
81
2
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
2
        }
83
84
75
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
75
        auto& col_res_data = col_res->get_data();
86
87
131k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
131k
            const char* str_ptr = col_str->get_data_at(i).data;
89
131k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
131k
            auto* res = &col_res_data[i];
91
131k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
131k
            simd::reverse_copy_bytes(ui8_ptr,
95
131k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
131k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
131k
            memset(ui8_ptr, str_size << 1, 1);
100
131k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
131k
            *res = *res &
103
131k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
131k
        }
105
106
75
        block.get_by_position(result).column = std::move(col_res);
107
108
75
        return Status::OK();
109
77
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
155
                        uint32_t result, size_t input_rows_count) const override {
70
155
        const ColumnString* col_str =
71
155
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
155
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
155
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
3
            return Status::InternalError(
78
3
                    "String is too long to encode, input string size {}, max valid string "
79
3
                    "size for {} is {}",
80
3
                    max_str_size, name,
81
3
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
3
        }
83
84
152
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
152
        auto& col_res_data = col_res->get_data();
86
87
732k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
731k
            const char* str_ptr = col_str->get_data_at(i).data;
89
731k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
731k
            auto* res = &col_res_data[i];
91
731k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
731k
            simd::reverse_copy_bytes(ui8_ptr,
95
731k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
731k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
731k
            memset(ui8_ptr, str_size << 1, 1);
100
731k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
731k
            *res = *res &
103
731k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
731k
        }
105
106
152
        block.get_by_position(result).column = std::move(col_res);
107
108
152
        return Status::OK();
109
155
    }
110
};
111
112
9
void register_function_encode_varchar(SimpleFunctionFactory& factory) {
113
9
    factory.register_function<FunctionEncodeVarchar<EncodeAsSmallInt, TYPE_SMALLINT>>();
114
9
    factory.register_function<FunctionEncodeVarchar<EncodeAsInt, TYPE_INT>>();
115
9
    factory.register_function<FunctionEncodeVarchar<EncodeAsBigInt, TYPE_BIGINT>>();
116
9
    factory.register_function<FunctionEncodeVarchar<EncodeAsLargeInt, TYPE_LARGEINT>>();
117
9
}
118
119
} // namespace doris