Coverage Report

Created: 2026-03-13 06:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_encode_varchar.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <limits>
20
#include <type_traits>
21
22
#include "common/exception.h"
23
#include "common/status.h"
24
#include "core/column/column_const.h"
25
#include "core/column/column_vector.h"
26
#include "core/data_type/data_type.h"
27
#include "core/data_type/data_type_number.h"
28
#include "core/data_type/data_type_string.h"
29
#include "core/data_type/primitive_type.h"
30
#include "core/types.h"
31
#include "exprs/function/function.h"
32
#include "exprs/function/function_helpers.h"
33
#include "exprs/function/simple_function_factory.h"
34
#include "util/simd/reverse_copy_bytes.h"
35
36
namespace doris {
37
38
struct EncodeAsSmallInt {
39
    static constexpr auto name = "encode_as_smallint";
40
};
41
42
struct EncodeAsInt {
43
    static constexpr auto name = "encode_as_int";
44
};
45
46
struct EncodeAsBigInt {
47
    static constexpr auto name = "encode_as_bigint";
48
};
49
50
struct EncodeAsLargeInt {
51
    static constexpr auto name = "encode_as_largeint";
52
};
53
54
template <typename Name, PrimitiveType ReturnType>
55
class FunctionEncodeVarchar : public IFunction {
56
public:
57
    static constexpr auto name = Name::name;
58
151
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE6createEv
Line
Count
Source
58
17
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE6createEv
Line
Count
Source
58
26
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE6createEv
Line
Count
Source
58
49
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE6createEv
Line
Count
Source
58
59
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
59
60
4
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
61
62
115
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE23get_number_of_argumentsEv
Line
Count
Source
62
8
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE23get_number_of_argumentsEv
Line
Count
Source
62
17
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE23get_number_of_argumentsEv
Line
Count
Source
62
40
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE23get_number_of_argumentsEv
Line
Count
Source
62
50
    size_t get_number_of_arguments() const override { return 1; }
63
64
115
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
115
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
115
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
8
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
8
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
17
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
17
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
40
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
40
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
40
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
50
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
50
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
50
    }
67
68
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
69
237
                        uint32_t result, size_t input_rows_count) const override {
70
237
        const ColumnString* col_str =
71
237
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
237
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
237
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
5
            return Status::InternalError(
78
5
                    "String is too long to encode, input string size {}, max valid string "
79
5
                    "size for {} is {}",
80
5
                    max_str_size, name,
81
5
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
5
        }
83
84
232
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
232
        auto& col_res_data = col_res->get_data();
86
87
717k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
717k
            const char* str_ptr = col_str->get_data_at(i).data;
89
717k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
717k
            auto* res = &col_res_data[i];
91
717k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
717k
            simd::reverse_copy_bytes(ui8_ptr,
95
717k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
717k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
717k
            memset(ui8_ptr, str_size << 1, 1);
100
717k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
717k
            *res = *res &
103
717k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
717k
        }
105
106
232
        block.get_by_position(result).column = std::move(col_res);
107
108
232
        return Status::OK();
109
237
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
9
                        uint32_t result, size_t input_rows_count) const override {
70
9
        const ColumnString* col_str =
71
9
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
9
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
9
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
8
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
8
        auto& col_res_data = col_res->get_data();
86
87
16.3k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
16.3k
            const char* str_ptr = col_str->get_data_at(i).data;
89
16.3k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
16.3k
            auto* res = &col_res_data[i];
91
16.3k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
16.3k
            simd::reverse_copy_bytes(ui8_ptr,
95
16.3k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
16.3k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
16.3k
            memset(ui8_ptr, str_size << 1, 1);
100
16.3k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
16.3k
            *res = *res &
103
16.3k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
16.3k
        }
105
106
8
        block.get_by_position(result).column = std::move(col_res);
107
108
8
        return Status::OK();
109
9
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
41
                        uint32_t result, size_t input_rows_count) const override {
70
41
        const ColumnString* col_str =
71
41
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
41
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
41
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
40
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
40
        auto& col_res_data = col_res->get_data();
86
87
32.8k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
32.8k
            const char* str_ptr = col_str->get_data_at(i).data;
89
32.8k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
32.8k
            auto* res = &col_res_data[i];
91
32.8k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
32.8k
            simd::reverse_copy_bytes(ui8_ptr,
95
32.8k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
32.8k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
32.8k
            memset(ui8_ptr, str_size << 1, 1);
100
32.8k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
32.8k
            *res = *res &
103
32.8k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
32.8k
        }
105
106
40
        block.get_by_position(result).column = std::move(col_res);
107
108
40
        return Status::OK();
109
41
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
65
                        uint32_t result, size_t input_rows_count) const override {
70
65
        const ColumnString* col_str =
71
65
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
65
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
65
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
64
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
64
        auto& col_res_data = col_res->get_data();
86
87
65.6k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
65.5k
            const char* str_ptr = col_str->get_data_at(i).data;
89
65.5k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
65.5k
            auto* res = &col_res_data[i];
91
65.5k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
65.5k
            simd::reverse_copy_bytes(ui8_ptr,
95
65.5k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
65.5k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
65.5k
            memset(ui8_ptr, str_size << 1, 1);
100
65.5k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
65.5k
            *res = *res &
103
65.5k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
65.5k
        }
105
106
64
        block.get_by_position(result).column = std::move(col_res);
107
108
64
        return Status::OK();
109
65
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
122
                        uint32_t result, size_t input_rows_count) const override {
70
122
        const ColumnString* col_str =
71
122
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
122
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
122
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
2
            return Status::InternalError(
78
2
                    "String is too long to encode, input string size {}, max valid string "
79
2
                    "size for {} is {}",
80
2
                    max_str_size, name,
81
2
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
2
        }
83
84
120
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
120
        auto& col_res_data = col_res->get_data();
86
87
602k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
602k
            const char* str_ptr = col_str->get_data_at(i).data;
89
602k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
602k
            auto* res = &col_res_data[i];
91
602k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
602k
            simd::reverse_copy_bytes(ui8_ptr,
95
602k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
602k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
602k
            memset(ui8_ptr, str_size << 1, 1);
100
602k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
602k
            *res = *res &
103
602k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
602k
        }
105
106
120
        block.get_by_position(result).column = std::move(col_res);
107
108
120
        return Status::OK();
109
122
    }
110
};
111
112
8
void register_function_encode_varchar(SimpleFunctionFactory& factory) {
113
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsSmallInt, TYPE_SMALLINT>>();
114
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsInt, TYPE_INT>>();
115
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsBigInt, TYPE_BIGINT>>();
116
8
    factory.register_function<FunctionEncodeVarchar<EncodeAsLargeInt, TYPE_LARGEINT>>();
117
8
}
118
119
} // namespace doris