Coverage Report

Created: 2026-03-15 22:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_encode_varchar.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <limits>
20
#include <type_traits>
21
22
#include "common/exception.h"
23
#include "common/status.h"
24
#include "core/column/column_const.h"
25
#include "core/column/column_vector.h"
26
#include "core/data_type/data_type.h"
27
#include "core/data_type/data_type_number.h"
28
#include "core/data_type/data_type_string.h"
29
#include "core/data_type/primitive_type.h"
30
#include "core/types.h"
31
#include "exprs/function/function.h"
32
#include "exprs/function/function_helpers.h"
33
#include "exprs/function/simple_function_factory.h"
34
#include "util/simd/reverse_copy_bytes.h"
35
36
namespace doris {
37
38
struct EncodeAsSmallInt {
39
    static constexpr auto name = "encode_as_smallint";
40
};
41
42
struct EncodeAsInt {
43
    static constexpr auto name = "encode_as_int";
44
};
45
46
struct EncodeAsBigInt {
47
    static constexpr auto name = "encode_as_bigint";
48
};
49
50
struct EncodeAsLargeInt {
51
    static constexpr auto name = "encode_as_largeint";
52
};
53
54
template <typename Name, PrimitiveType ReturnType>
55
class FunctionEncodeVarchar : public IFunction {
56
public:
57
    static constexpr auto name = Name::name;
58
151
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE6createEv
Line
Count
Source
58
17
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE6createEv
Line
Count
Source
58
26
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE6createEv
Line
Count
Source
58
49
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE6createEv
Line
Count
Source
58
59
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
59
60
4
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
61
62
115
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE23get_number_of_argumentsEv
Line
Count
Source
62
8
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE23get_number_of_argumentsEv
Line
Count
Source
62
17
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE23get_number_of_argumentsEv
Line
Count
Source
62
40
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE23get_number_of_argumentsEv
Line
Count
Source
62
50
    size_t get_number_of_arguments() const override { return 1; }
63
64
115
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
115
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
115
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
8
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
8
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
17
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
17
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
40
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
40
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
40
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
50
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
50
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
50
    }
67
68
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
69
426
                        uint32_t result, size_t input_rows_count) const override {
70
426
        const ColumnString* col_str =
71
426
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
426
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
426
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
5
            return Status::InternalError(
78
5
                    "String is too long to encode, input string size {}, max valid string "
79
5
                    "size for {} is {}",
80
5
                    max_str_size, name,
81
5
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
5
        }
83
84
421
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
421
        auto& col_res_data = col_res->get_data();
86
87
720k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
719k
            const char* str_ptr = col_str->get_data_at(i).data;
89
719k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
719k
            auto* res = &col_res_data[i];
91
719k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
719k
            simd::reverse_copy_bytes(ui8_ptr,
95
719k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
719k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
719k
            memset(ui8_ptr, str_size << 1, 1);
100
719k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
719k
            *res = *res &
103
719k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
719k
        }
105
106
421
        block.get_by_position(result).column = std::move(col_res);
107
108
421
        return Status::OK();
109
426
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
9
                        uint32_t result, size_t input_rows_count) const override {
70
9
        const ColumnString* col_str =
71
9
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
9
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
9
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
8
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
8
        auto& col_res_data = col_res->get_data();
86
87
16.3k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
16.3k
            const char* str_ptr = col_str->get_data_at(i).data;
89
16.3k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
16.3k
            auto* res = &col_res_data[i];
91
16.3k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
16.3k
            simd::reverse_copy_bytes(ui8_ptr,
95
16.3k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
16.3k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
16.3k
            memset(ui8_ptr, str_size << 1, 1);
100
16.3k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
16.3k
            *res = *res &
103
16.3k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
16.3k
        }
105
106
8
        block.get_by_position(result).column = std::move(col_res);
107
108
8
        return Status::OK();
109
9
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
41
                        uint32_t result, size_t input_rows_count) const override {
70
41
        const ColumnString* col_str =
71
41
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
41
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
41
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
40
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
40
        auto& col_res_data = col_res->get_data();
86
87
32.8k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
32.8k
            const char* str_ptr = col_str->get_data_at(i).data;
89
32.8k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
32.8k
            auto* res = &col_res_data[i];
91
32.8k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
32.8k
            simd::reverse_copy_bytes(ui8_ptr,
95
32.8k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
32.8k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
32.8k
            memset(ui8_ptr, str_size << 1, 1);
100
32.8k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
32.8k
            *res = *res &
103
32.8k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
32.8k
        }
105
106
40
        block.get_by_position(result).column = std::move(col_res);
107
108
40
        return Status::OK();
109
41
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
62
                        uint32_t result, size_t input_rows_count) const override {
70
62
        const ColumnString* col_str =
71
62
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
62
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
62
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
61
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
61
        auto& col_res_data = col_res->get_data();
86
87
65.6k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
65.5k
            const char* str_ptr = col_str->get_data_at(i).data;
89
65.5k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
65.5k
            auto* res = &col_res_data[i];
91
65.5k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
65.5k
            simd::reverse_copy_bytes(ui8_ptr,
95
65.5k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
65.5k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
65.5k
            memset(ui8_ptr, str_size << 1, 1);
100
65.5k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
65.5k
            *res = *res &
103
65.5k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
65.5k
        }
105
106
61
        block.get_by_position(result).column = std::move(col_res);
107
108
61
        return Status::OK();
109
62
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
314
                        uint32_t result, size_t input_rows_count) const override {
70
314
        const ColumnString* col_str =
71
314
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
314
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
314
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
2
            return Status::InternalError(
78
2
                    "String is too long to encode, input string size {}, max valid string "
79
2
                    "size for {} is {}",
80
2
                    max_str_size, name,
81
2
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
2
        }
83
84
312
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
312
        auto& col_res_data = col_res->get_data();
86
87
605k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
605k
            const char* str_ptr = col_str->get_data_at(i).data;
89
605k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
605k
            auto* res = &col_res_data[i];
91
605k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
605k
            simd::reverse_copy_bytes(ui8_ptr,
95
605k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
605k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
605k
            memset(ui8_ptr, str_size << 1, 1);
100
605k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
605k
            *res = *res &
103
605k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
605k
        }
105
106
312
        block.get_by_position(result).column = std::move(col_res);
107
108
312
        return Status::OK();
109
314
    }
110
};
111
112
8
void register_function_encode_varchar(SimpleFunctionFactory& factory) {
113
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsSmallInt, TYPE_SMALLINT>>();
114
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsInt, TYPE_INT>>();
115
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsBigInt, TYPE_BIGINT>>();
116
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsLargeInt, TYPE_LARGEINT>>();
117
8
}
118
119
} // namespace doris