Coverage Report

Created: 2026-03-19 11:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_uuid.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cctype>
19
#include <cstddef>
20
#include <cstring>
21
#include <memory>
22
#include <utility>
23
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/block/block.h"
27
#include "core/block/column_numbers.h"
28
#include "core/block/column_with_type_and_name.h"
29
#include "core/column/column.h"
30
#include "core/column/column_nullable.h"
31
#include "core/column/column_string.h"
32
#include "core/column/column_vector.h"
33
#include "core/data_type/data_type.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/data_type_string.h"
37
#include "core/types.h"
38
#include "exprs/aggregate/aggregate_function.h"
39
#include "exprs/function/function.h"
40
#include "exprs/function/simple_function_factory.h"
41
42
namespace doris {
43
class FunctionContext;
44
} // namespace doris
45
46
namespace doris {
47
constexpr static std::array<int, 5> SPLIT_POS = {8, 13, 18, 23, 36}; // 8-4-4-4-12
48
constexpr static char DELIMITER = '-';
49
50
class FunctionUuidtoInt : public IFunction {
51
public:
52
    static constexpr auto name = "uuid_to_int";
53
54
34
    static FunctionPtr create() { return std::make_shared<FunctionUuidtoInt>(); }
55
56
2
    String get_name() const override { return name; }
57
58
23
    size_t get_number_of_arguments() const override { return 1; }
59
60
23
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
61
23
        return make_nullable(std::make_shared<DataTypeInt128>());
62
23
    }
63
64
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
65
18
                        uint32_t result, size_t input_rows_count) const override {
66
18
        const auto& arg_column =
67
18
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
68
69
18
        auto result_column = ColumnInt128::create(input_rows_count);
70
18
        auto& result_data = result_column->get_data();
71
18
        auto null_column = ColumnUInt8::create(input_rows_count);
72
18
        auto& null_map = null_column->get_data();
73
74
46
        for (int row = 0; row < input_rows_count; row++) {
75
28
            auto str = arg_column.get_data_at(row);
76
28
            const auto* data = str.data;
77
28
            Int128* result_cell = &result_data[row];
78
28
            *result_cell = 0;
79
28
            null_map[row] = false;
80
81
28
            if (str.size == 36) {
82
18
                if (data[SPLIT_POS[0]] != DELIMITER || data[SPLIT_POS[1]] != DELIMITER ||
83
18
                    data[SPLIT_POS[2]] != DELIMITER || data[SPLIT_POS[3]] != DELIMITER) {
84
0
                    null_map[row] = true;
85
0
                    continue;
86
0
                }
87
18
                char new_data[32];
88
18
                memset(new_data, 0, sizeof(new_data));
89
                // ignore '-'
90
18
                memcpy(new_data, data, 8);
91
18
                memcpy(new_data + 8, data + SPLIT_POS[0] + 1, 4);
92
18
                memcpy(new_data + 12, data + SPLIT_POS[1] + 1, 4);
93
18
                memcpy(new_data + 16, data + SPLIT_POS[2] + 1, 4);
94
18
                memcpy(new_data + 20, data + SPLIT_POS[3] + 1, 12);
95
96
18
                if (!serialize(new_data, (char*)result_cell, 32)) {
97
0
                    null_map[row] = true;
98
0
                    continue;
99
0
                }
100
18
            } else if (str.size == 32) {
101
4
                if (!serialize(data, (char*)result_cell, 32)) {
102
0
                    null_map[row] = true;
103
0
                    continue;
104
0
                }
105
6
            } else {
106
6
                null_map[row] = true;
107
6
                continue;
108
6
            }
109
28
        }
110
111
18
        block.replace_by_position(
112
18
                result, ColumnNullable::create(std::move(result_column), std::move(null_column)));
113
18
        return Status::OK();
114
18
    }
115
116
    // use char* to write dst is the only legal way by 'restrict aliasing rule'
117
22
    static bool serialize(const char* __restrict src, char* __restrict dst, size_t length) {
118
22
        char target; // 8bit, contains 2 char input
119
704
        auto translate = [&target](const char ch) {
120
704
            if (isdigit(ch)) {
121
450
                target += ch - '0';
122
450
            } else if (ch >= 'a' && ch <= 'f') {
123
227
                target += ch - 'a' + 10;
124
227
            } else if (ch >= 'A' && ch <= 'F') {
125
27
                target += ch - 'A' + 10;
126
27
            } else {
127
0
                return false;
128
0
            }
129
704
            return true;
130
704
        };
131
132
22
        bool ok = true;
133
374
        for (size_t i = 0; i < length; i += 2, src++, dst++) {
134
352
            target = 0;
135
352
            if (!translate(*src)) {
136
0
                ok = false; // dont break for auto-simd
137
0
            }
138
139
352
            src++;
140
352
            target <<= 4;
141
352
            if (!translate(*src)) {
142
0
                ok = false;
143
0
            }
144
352
            *dst = target;
145
352
        }
146
147
22
        return ok;
148
22
    }
149
};
150
151
class FunctionInttoUuid : public IFunction {
152
public:
153
    static constexpr auto name = "int_to_uuid";
154
155
25
    static FunctionPtr create() { return std::make_shared<FunctionInttoUuid>(); }
156
157
2
    String get_name() const override { return name; }
158
159
14
    size_t get_number_of_arguments() const override { return 1; }
160
161
14
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
162
14
        return std::make_shared<DataTypeString>();
163
14
    }
164
165
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
166
11
                        uint32_t result, size_t input_rows_count) const override {
167
11
        const auto& arg_column =
168
11
                assert_cast<const ColumnInt128&>(*block.get_by_position(arguments[0]).column);
169
11
        auto result_column = ColumnString::create();
170
11
        constexpr int str_length = 36;
171
11
        auto& col_data = result_column->get_chars();
172
11
        auto& col_offset = result_column->get_offsets();
173
11
        col_data.resize(str_length * input_rows_count +
174
11
                        1); // for branchless deserialize, we occupy one more byte for the last '-'
175
11
        col_offset.resize(input_rows_count);
176
177
28
        for (int row = 0; row < input_rows_count; row++) {
178
17
            const Int128* arg = &arg_column.get_data()[row];
179
17
            col_offset[row] = col_offset[row - 1] + str_length;
180
17
            deserialize((char*)arg, col_data.data() + str_length * row);
181
17
        }
182
11
        col_data.resize(str_length * input_rows_count);
183
11
        block.replace_by_position(result, std::move(result_column));
184
11
        return Status::OK();
185
11
    }
186
187
    // use char* to read src is the only legal way by 'restrict aliasing rule'
188
17
    static void deserialize(const char* __restrict src, unsigned char* __restrict dst) {
189
544
        auto transform = [](char ch) -> unsigned char {
190
544
            if (ch < 10) {
191
353
                return ch + '0';
192
353
            } else {
193
191
                return ch - 10 + 'a';
194
191
            }
195
544
        };
196
197
17
        int j = 0;
198
85
        for (int i : SPLIT_POS) {
199
357
            for (; j < i; src++, j += 2) { // input 16 chars, 2 data per char
200
272
                dst[j] = transform(((*src) >> 4) & 0x0F);
201
272
                dst[j + 1] = transform(*src & 0x0F);
202
272
            }
203
85
            dst[j++] = DELIMITER; // we resized one more byte.
204
85
        }
205
17
    }
206
};
207
208
9
void register_function_uuid_transforms(SimpleFunctionFactory& factory) {
209
9
    factory.register_function<FunctionUuidtoInt>();
210
9
    factory.register_function<FunctionInttoUuid>();
211
9
}
212
213
} // namespace doris