Coverage Report

Created: 2026-03-13 03:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_uuid.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cctype>
19
#include <cstddef>
20
#include <cstring>
21
#include <memory>
22
#include <utility>
23
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/block/block.h"
27
#include "core/block/column_numbers.h"
28
#include "core/block/column_with_type_and_name.h"
29
#include "core/column/column.h"
30
#include "core/column/column_nullable.h"
31
#include "core/column/column_string.h"
32
#include "core/column/column_vector.h"
33
#include "core/data_type/data_type.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/data_type_string.h"
37
#include "core/types.h"
38
#include "exprs/aggregate/aggregate_function.h"
39
#include "exprs/function/function.h"
40
#include "exprs/function/simple_function_factory.h"
41
42
namespace doris {
43
class FunctionContext;
44
} // namespace doris
45
46
namespace doris {
47
constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12
48
constexpr static char DELIMITER = '-';
49
50
class FunctionUuidtoInt : public IFunction {
51
public:
52
    static constexpr auto name = "uuid_to_int";
53
54
25
    static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); }
55
56
1
    String get_name() const override { return name; }
57
58
16
    size_t get_number_of_arguments() const override { return 1; }
59
60
16
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
61
16
        return make_nullable(std::make_shared<DataTypeInt128>());
62
16
    }
63
64
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
65
12
                        uint32_t result, size_t input_rows_count) const override {
66
12
        const auto& arg_column =
67
12
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
68
69
12
        auto result_column = ColumnInt128::create(input_rows_count);
70
12
        auto& result_data = result_column->get_data();
71
12
        auto null_column = ColumnUInt8::create(input_rows_count);
72
12
        auto& null_map = null_column->get_data();
73
74
29
        for (int row = 0; row < input_rows_count; row++) {
75
17
            auto str = arg_column.get_data_at(row);
76
17
            const auto* data = str.data;
77
17
            Int128* result_cell = &result_data[row];
78
17
            *result_cell = 0;
79
17
            null_map[row] = false;
80
81
17
            if (str.size == 36) {
82
12
                if (data[SPLIT_POS[0]] != DELIMITER || data[SPLIT_POS[1]] != DELIMITER ||
83
12
                    data[SPLIT_POS[2]] != DELIMITER || data[SPLIT_POS[3]] != DELIMITER) {
84
0
                    null_map[row] = true;
85
0
                    continue;
86
0
                }
87
12
                char new_data[32];
88
12
                memset(new_data, 0, sizeof(new_data));
89
                // ignore '-'
90
12
                memcpy(new_data, data, 8);
91
12
                memcpy(new_data + 8, data + SPLIT_POS[0] + 1, 4);
92
12
                memcpy(new_data + 12, data + SPLIT_POS[1] + 1, 4);
93
12
                memcpy(new_data + 16, data + SPLIT_POS[2] + 1, 4);
94
12
                memcpy(new_data + 20, data + SPLIT_POS[3] + 1, 12);
95
96
12
                if (!serialize(new_data, (char*)result_cell, 32)) {
97
0
                    null_map[row] = true;
98
0
                    continue;
99
0
                }
100
12
            } else if (str.size == 32) {
101
2
                if (!serialize(data, (char*)result_cell, 32)) {
102
0
                    null_map[row] = true;
103
0
                    continue;
104
0
                }
105
3
            } else {
106
3
                null_map[row] = true;
107
3
                continue;
108
3
            }
109
17
        }
110
111
12
        block.replace_by_position(
112
12
                result, ColumnNullable::create(std::move(result_column), std::move(null_column)));
113
12
        return Status::OK();
114
12
    }
115
116
    // use char* to write dst is the only legal way by 'restrict aliasing rule'
117
14
    static bool serialize(const char* __restrict src, char* __restrict dst, size_t length) {
118
14
        char target; // 8bit, contains 2 char input
119
448
        auto translate = [&target](const char ch) {
120
448
            if (isdigit(ch)) {
121
294
                target += ch - '0';
122
294
            } else if (ch >= 'a' && ch <= 'f') {
123
127
                target += ch - 'a' + 10;
124
127
            } else if (ch >= 'A' && ch <= 'F') {
125
27
                target += ch - 'A' + 10;
126
27
            } else {
127
0
                return false;
128
0
            }
129
448
            return true;
130
448
        };
131
132
14
        bool ok = true;
133
238
        for (size_t i = 0; i < length; i += 2, src++, dst++) {
134
224
            target = 0;
135
224
            if (!translate(*src)) {
136
0
                ok = false; // dont break for auto-simd
137
0
            }
138
139
224
            src++;
140
224
            target <<= 4;
141
224
            if (!translate(*src)) {
142
0
                ok = false;
143
0
            }
144
224
            *dst = target;
145
224
        }
146
147
14
        return ok;
148
14
    }
149
};
150
151
class FunctionInttoUuid : public IFunction {
152
public:
153
    static constexpr auto name = "int_to_uuid";
154
155
17
    static FunctionPtr create() { return std::make_shared<FunctionInttoUuid>(); }
156
157
1
    String get_name() const override { return name; }
158
159
8
    size_t get_number_of_arguments() const override { return 1; }
160
161
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
162
8
        return std::make_shared<DataTypeString>();
163
8
    }
164
165
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
166
7
                        uint32_t result, size_t input_rows_count) const override {
167
7
        const auto& arg_column =
168
7
                assert_cast<const ColumnInt128&>(*block.get_by_position(arguments[0]).column);
169
7
        auto result_column = ColumnString::create();
170
7
        constexpr int str_length = 36;
171
7
        auto& col_data = result_column->get_chars();
172
7
        auto& col_offset = result_column->get_offsets();
173
7
        col_data.resize(str_length * input_rows_count +
174
7
                        1); // for branchless deserialize, we occupy one more byte for the last '-'
175
7
        col_offset.resize(input_rows_count);
176
177
17
        for (int row = 0; row < input_rows_count; row++) {
178
10
            const Int128* arg = &arg_column.get_data()[row];
179
10
            col_offset[row] = col_offset[row - 1] + str_length;
180
10
            deserialize((char*)arg, col_data.data() + str_length * row);
181
10
        }
182
7
        col_data.resize(str_length * input_rows_count);
183
7
        block.replace_by_position(result, std::move(result_column));
184
7
        return Status::OK();
185
7
    }
186
187
    // use char* to read src is the only legal way by 'restrict aliasing rule'
188
10
    static void deserialize(const char* __restrict src, unsigned char* __restrict dst) {
189
320
        auto transform = [](char ch) -> unsigned char {
190
320
            if (ch < 10) {
191
211
                return ch + '0';
192
211
            } else {
193
109
                return ch - 10 + 'a';
194
109
            }
195
320
        };
196
197
10
        int j = 0;
198
50
        for (int i : SPLIT_POS) {
199
210
            for (; j < i; src++, j += 2) { // input 16 chars, 2 data per char
200
160
                dst[j] = transform(((*src) >> 4) & 0x0F);
201
160
                dst[j + 1] = transform(*src & 0x0F);
202
160
            }
203
50
            dst[j++] = DELIMITER; // we resized one more byte.
204
50
        }
205
10
    }
206
};
207
208
8
void register_function_uuid_transforms(SimpleFunctionFactory& factory) {
209
8
    factory.register_function<FunctionUuidtoInt>();
210
8
    factory.register_function<FunctionInttoUuid>();
211
8
}
212
213
} // namespace doris