Coverage Report

Created: 2026-03-13 12:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_uuid.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cctype>
19
#include <cstddef>
20
#include <cstring>
21
#include <memory>
22
#include <utility>
23
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/block/block.h"
27
#include "core/block/column_numbers.h"
28
#include "core/block/column_with_type_and_name.h"
29
#include "core/column/column.h"
30
#include "core/column/column_nullable.h"
31
#include "core/column/column_string.h"
32
#include "core/column/column_vector.h"
33
#include "core/data_type/data_type.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/data_type_string.h"
37
#include "core/types.h"
38
#include "exprs/aggregate/aggregate_function.h"
39
#include "exprs/function/function.h"
40
#include "exprs/function/simple_function_factory.h"
41
42
namespace doris {
43
class FunctionContext;
44
} // namespace doris
45
46
namespace doris {
47
constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12
48
constexpr static char DELIMITER = '-';
49
50
class FunctionUuidtoInt : public IFunction {
51
public:
52
    static constexpr auto name = "uuid_to_int";
53
54
15
    static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); }
55
56
1
    String get_name() const override { return name; }
57
58
7
    size_t get_number_of_arguments() const override { return 1; }
59
60
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
61
7
        return make_nullable(std::make_shared<DataTypeInt128>());
62
7
    }
63
64
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
65
6
                        uint32_t result, size_t input_rows_count) const override {
66
6
        const auto& arg_column =
67
6
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
68
69
6
        auto result_column = ColumnInt128::create(input_rows_count);
70
6
        auto& result_data = result_column->get_data();
71
6
        auto null_column = ColumnUInt8::create(input_rows_count);
72
6
        auto& null_map = null_column->get_data();
73
74
17
        for (int row = 0; row < input_rows_count; row++) {
75
11
            auto str = arg_column.get_data_at(row);
76
11
            const auto* data = str.data;
77
11
            Int128* result_cell = &result_data[row];
78
11
            *result_cell = 0;
79
11
            null_map[row] = false;
80
81
11
            if (str.size == 36) {
82
6
                if (data[SPLIT_POS[0]] != DELIMITER || data[SPLIT_POS[1]] != DELIMITER ||
83
6
                    data[SPLIT_POS[2]] != DELIMITER || data[SPLIT_POS[3]] != DELIMITER) {
84
0
                    null_map[row] = true;
85
0
                    continue;
86
0
                }
87
6
                char new_data[32];
88
6
                memset(new_data, 0, sizeof(new_data));
89
                // ignore '-'
90
6
                memcpy(new_data, data, 8);
91
6
                memcpy(new_data + 8, data + SPLIT_POS[0] + 1, 4);
92
6
                memcpy(new_data + 12, data + SPLIT_POS[1] + 1, 4);
93
6
                memcpy(new_data + 16, data + SPLIT_POS[2] + 1, 4);
94
6
                memcpy(new_data + 20, data + SPLIT_POS[3] + 1, 12);
95
96
6
                if (!serialize(new_data, (char*)result_cell, 32)) {
97
0
                    null_map[row] = true;
98
0
                    continue;
99
0
                }
100
6
            } else if (str.size == 32) {
101
2
                if (!serialize(data, (char*)result_cell, 32)) {
102
0
                    null_map[row] = true;
103
0
                    continue;
104
0
                }
105
3
            } else {
106
3
                null_map[row] = true;
107
3
                continue;
108
3
            }
109
11
        }
110
111
6
        block.replace_by_position(
112
6
                result, ColumnNullable::create(std::move(result_column), std::move(null_column)));
113
6
        return Status::OK();
114
6
    }
115
116
    // use char* to write dst is the only legal way by 'restrict aliasing rule'
117
8
    static bool serialize(const char* __restrict src, char* __restrict dst, size_t length) {
118
8
        char target; // 8bit, contains 2 char input
119
256
        auto translate = [&target](const char ch) {
120
256
            if (isdigit(ch)) {
121
156
                target += ch - '0';
122
156
            } else if (ch >= 'a' && ch <= 'f') {
123
100
                target += ch - 'a' + 10;
124
100
            } else if (ch >= 'A' && ch <= 'F') {
125
0
                target += ch - 'A' + 10;
126
0
            } else {
127
0
                return false;
128
0
            }
129
256
            return true;
130
256
        };
131
132
8
        bool ok = true;
133
136
        for (size_t i = 0; i < length; i += 2, src++, dst++) {
134
128
            target = 0;
135
128
            if (!translate(*src)) {
136
0
                ok = false; // dont break for auto-simd
137
0
            }
138
139
128
            src++;
140
128
            target <<= 4;
141
128
            if (!translate(*src)) {
142
0
                ok = false;
143
0
            }
144
128
            *dst = target;
145
128
        }
146
147
8
        return ok;
148
8
    }
149
};
150
151
class FunctionInttoUuid : public IFunction {
152
public:
153
    static constexpr auto name = "int_to_uuid";
154
155
13
    static FunctionPtr create() { return std::make_shared<FunctionInttoUuid>(); }
156
157
1
    String get_name() const override { return name; }
158
159
5
    size_t get_number_of_arguments() const override { return 1; }
160
161
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
162
5
        return std::make_shared<DataTypeString>();
163
5
    }
164
165
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
166
4
                        uint32_t result, size_t input_rows_count) const override {
167
4
        const auto& arg_column =
168
4
                assert_cast<const ColumnInt128&>(*block.get_by_position(arguments[0]).column);
169
4
        auto result_column = ColumnString::create();
170
4
        constexpr int str_length = 36;
171
4
        auto& col_data = result_column->get_chars();
172
4
        auto& col_offset = result_column->get_offsets();
173
4
        col_data.resize(str_length * input_rows_count +
174
4
                        1); // for branchless deserialize, we occupy one more byte for the last '-'
175
4
        col_offset.resize(input_rows_count);
176
177
11
        for (int row = 0; row < input_rows_count; row++) {
178
7
            const Int128* arg = &arg_column.get_data()[row];
179
7
            col_offset[row] = col_offset[row - 1] + str_length;
180
7
            deserialize((char*)arg, col_data.data() + str_length * row);
181
7
        }
182
4
        col_data.resize(str_length * input_rows_count);
183
4
        block.replace_by_position(result, std::move(result_column));
184
4
        return Status::OK();
185
4
    }
186
187
    // use char* to read src is the only legal way by 'restrict aliasing rule'
188
7
    static void deserialize(const char* __restrict src, unsigned char* __restrict dst) {
189
224
        auto transform = [](char ch) -> unsigned char {
190
224
            if (ch < 10) {
191
142
                return ch + '0';
192
142
            } else {
193
82
                return ch - 10 + 'a';
194
82
            }
195
224
        };
196
197
7
        int j = 0;
198
35
        for (int i : SPLIT_POS) {
199
147
            for (; j < i; src++, j += 2) { // input 16 chars, 2 data per char
200
112
                dst[j] = transform(((*src) >> 4) & 0x0F);
201
112
                dst[j + 1] = transform(*src & 0x0F);
202
112
            }
203
35
            dst[j++] = DELIMITER; // we resized one more byte.
204
35
        }
205
7
    }
206
};
207
208
7
void register_function_uuid_transforms(SimpleFunctionFactory& factory) {
209
7
    factory.register_function<FunctionUuidtoInt>();
210
7
    factory.register_function<FunctionInttoUuid>();
211
7
}
212
213
} // namespace doris