Coverage Report

Created: 2026-03-15 22:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_encode_varchar.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <limits>
20
#include <type_traits>
21
22
#include "common/exception.h"
23
#include "common/status.h"
24
#include "core/column/column_const.h"
25
#include "core/column/column_vector.h"
26
#include "core/data_type/data_type.h"
27
#include "core/data_type/data_type_number.h"
28
#include "core/data_type/data_type_string.h"
29
#include "core/data_type/primitive_type.h"
30
#include "core/types.h"
31
#include "exprs/function/function.h"
32
#include "exprs/function/function_helpers.h"
33
#include "exprs/function/simple_function_factory.h"
34
#include "util/simd/reverse_copy_bytes.h"
35
36
namespace doris {
37
38
struct EncodeAsSmallInt {
39
    static constexpr auto name = "encode_as_smallint";
40
};
41
42
struct EncodeAsInt {
43
    static constexpr auto name = "encode_as_int";
44
};
45
46
struct EncodeAsBigInt {
47
    static constexpr auto name = "encode_as_bigint";
48
};
49
50
struct EncodeAsLargeInt {
51
    static constexpr auto name = "encode_as_largeint";
52
};
53
54
template <typename Name, PrimitiveType ReturnType>
55
class FunctionEncodeVarchar : public IFunction {
56
public:
57
    static constexpr auto name = Name::name;
58
72
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE6createEv
Line
Count
Source
58
7
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE6createEv
Line
Count
Source
58
11
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE6createEv
Line
Count
Source
58
19
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
_ZN5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE6createEv
Line
Count
Source
58
35
    static FunctionPtr create() { return std::make_shared<FunctionEncodeVarchar>(); }
59
60
4
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE8get_nameB5cxx11Ev
Line
Count
Source
60
1
    String get_name() const override { return name; }
61
62
64
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE23get_number_of_argumentsEv
Line
Count
Source
62
5
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE23get_number_of_argumentsEv
Line
Count
Source
62
9
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE23get_number_of_argumentsEv
Line
Count
Source
62
17
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE23get_number_of_argumentsEv
Line
Count
Source
62
33
    size_t get_number_of_arguments() const override { return 1; }
63
64
64
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
64
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
64
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
5
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
5
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
9
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
9
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
9
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
17
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
17
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS8_EE
Line
Count
Source
64
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
33
        return std::make_shared<typename PrimitiveTypeTraits<ReturnType>::DataType>();
66
33
    }
67
68
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
69
64
                        uint32_t result, size_t input_rows_count) const override {
70
64
        const ColumnString* col_str =
71
64
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
64
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
64
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
4
            return Status::InternalError(
78
4
                    "String is too long to encode, input string size {}, max valid string "
79
4
                    "size for {} is {}",
80
4
                    max_str_size, name,
81
4
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
4
        }
83
84
60
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
60
        auto& col_res_data = col_res->get_data();
86
87
245k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
245k
            const char* str_ptr = col_str->get_data_at(i).data;
89
245k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
245k
            auto* res = &col_res_data[i];
91
245k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
245k
            simd::reverse_copy_bytes(ui8_ptr,
95
245k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
245k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
245k
            memset(ui8_ptr, str_size << 1, 1);
100
245k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
245k
            *res = *res &
103
245k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
245k
        }
105
106
60
        block.get_by_position(result).column = std::move(col_res);
107
108
60
        return Status::OK();
109
64
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsSmallIntELNS_13PrimitiveTypeE4EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
5
                        uint32_t result, size_t input_rows_count) const override {
70
5
        const ColumnString* col_str =
71
5
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
5
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
5
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
4
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
4
        auto& col_res_data = col_res->get_data();
86
87
16.3k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
16.3k
            const char* str_ptr = col_str->get_data_at(i).data;
89
16.3k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
16.3k
            auto* res = &col_res_data[i];
91
16.3k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
16.3k
            simd::reverse_copy_bytes(ui8_ptr,
95
16.3k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
16.3k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
16.3k
            memset(ui8_ptr, str_size << 1, 1);
100
16.3k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
16.3k
            *res = *res &
103
16.3k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
16.3k
        }
105
106
4
        block.get_by_position(result).column = std::move(col_res);
107
108
4
        return Status::OK();
109
5
    }
_ZNK5doris21FunctionEncodeVarcharINS_11EncodeAsIntELNS_13PrimitiveTypeE5EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
9
                        uint32_t result, size_t input_rows_count) const override {
70
9
        const ColumnString* col_str =
71
9
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
9
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
9
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
8
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
8
        auto& col_res_data = col_res->get_data();
86
87
32.7k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
32.7k
            const char* str_ptr = col_str->get_data_at(i).data;
89
32.7k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
32.7k
            auto* res = &col_res_data[i];
91
32.7k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
32.7k
            simd::reverse_copy_bytes(ui8_ptr,
95
32.7k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
32.7k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
32.7k
            memset(ui8_ptr, str_size << 1, 1);
100
32.7k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
32.7k
            *res = *res &
103
32.7k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
32.7k
        }
105
106
8
        block.get_by_position(result).column = std::move(col_res);
107
108
8
        return Status::OK();
109
9
    }
_ZNK5doris21FunctionEncodeVarcharINS_14EncodeAsBigIntELNS_13PrimitiveTypeE6EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
17
                        uint32_t result, size_t input_rows_count) const override {
70
17
        const ColumnString* col_str =
71
17
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
17
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
17
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
16
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
16
        auto& col_res_data = col_res->get_data();
86
87
65.5k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
65.5k
            const char* str_ptr = col_str->get_data_at(i).data;
89
65.5k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
65.5k
            auto* res = &col_res_data[i];
91
65.5k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
65.5k
            simd::reverse_copy_bytes(ui8_ptr,
95
65.5k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
65.5k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
65.5k
            memset(ui8_ptr, str_size << 1, 1);
100
65.5k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
65.5k
            *res = *res &
103
65.5k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
65.5k
        }
105
106
16
        block.get_by_position(result).column = std::move(col_res);
107
108
16
        return Status::OK();
109
17
    }
_ZNK5doris21FunctionEncodeVarcharINS_16EncodeAsLargeIntELNS_13PrimitiveTypeE7EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
69
33
                        uint32_t result, size_t input_rows_count) const override {
70
33
        const ColumnString* col_str =
71
33
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
72
73
        // max_row_byte_size = size of string + size of offset value
74
33
        size_t max_str_size = col_str->get_max_row_byte_size() - sizeof(UInt32);
75
76
33
        if (max_str_size > sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1) {
77
1
            return Status::InternalError(
78
1
                    "String is too long to encode, input string size {}, max valid string "
79
1
                    "size for {} is {}",
80
1
                    max_str_size, name,
81
1
                    sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType) - 1);
82
1
        }
83
84
32
        auto col_res = PrimitiveTypeTraits<ReturnType>::ColumnType::create(input_rows_count, 0);
85
32
        auto& col_res_data = col_res->get_data();
86
87
131k
        for (size_t i = 0; i < input_rows_count; ++i) {
88
131k
            const char* str_ptr = col_str->get_data_at(i).data;
89
131k
            auto str_size = static_cast<UInt8>(col_str->get_data_at(i).size);
90
131k
            auto* res = &col_res_data[i];
91
131k
            auto* __restrict ui8_ptr = reinterpret_cast<UInt8*>(res);
92
93
            // "reverse" the order of string on little endian machine.
94
131k
            simd::reverse_copy_bytes(ui8_ptr,
95
131k
                                     sizeof(typename PrimitiveTypeTraits<ReturnType>::CppType),
96
131k
                                     str_ptr, str_size);
97
            // Lowest byte of Integer stores the size of the string, bit left shiflted by 1 so that we can get
98
            // correct size after right shifting by 1
99
131k
            memset(ui8_ptr, str_size << 1, 1);
100
131k
            *res >>= 1;
101
            // operator &= can not be applied to Int128
102
131k
            *res = *res &
103
131k
                   std::numeric_limits<typename PrimitiveTypeTraits<ReturnType>::CppType>::max();
104
131k
        }
105
106
32
        block.get_by_position(result).column = std::move(col_res);
107
108
32
        return Status::OK();
109
33
    }
110
};
111
112
1
void register_function_encode_varchar(SimpleFunctionFactory& factory) {
113
1
    factory.register_function<FunctionEncodeVarchar<EncodeAsSmallInt, TYPE_SMALLINT>>();
114
1
    factory.register_function<FunctionEncodeVarchar<EncodeAsInt, TYPE_INT>>();
115
1
    factory.register_function<FunctionEncodeVarchar<EncodeAsBigInt, TYPE_BIGINT>>();
116
1
    factory.register_function<FunctionEncodeVarchar<EncodeAsLargeInt, TYPE_LARGEINT>>();
117
1
}
118
119
} // namespace doris