be/src/exprs/function/uuid_numeric.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <memory> |
19 | | #include <string> |
20 | | #include <utility> |
21 | | |
22 | | #include "common/status.h" |
23 | | #include "core/block/block.h" |
24 | | #include "core/column/column_vector.h" |
25 | | #include "core/data_type/data_type_number.h" |
26 | | #include "core/types.h" |
27 | | #include "core/value/large_int_value.h" |
28 | | #include "exec/common/hash_table/hash.h" |
29 | | #include "exprs/function/function.h" |
30 | | #include "exprs/function/simple_function_factory.h" |
31 | | |
32 | | namespace doris { |
33 | | class FunctionContext; |
34 | | } // namespace doris |
35 | | |
36 | | namespace doris { |
37 | | |
38 | | // NOTE: |
39 | | // The implementatin of random generator is inspired by the RandImpl::execute of ClickHouse. |
40 | | // The ClickHouse RandImpl::execute function provided valuable insights and ideas for the development process. |
41 | | |
42 | | struct LinearCongruentialGenerator { |
43 | | /// Constants from `man lrand48_r`. |
44 | | static constexpr UInt64 a = 0x5DEECE66D; |
45 | | static constexpr UInt64 c = 0xB; |
46 | | |
47 | | /// And this is from `head -c8 /dev/urandom | xxd -p` |
48 | | UInt64 current = 0xbcabbed75dfe77cdLL; |
49 | | |
50 | 0 | void seed(UInt64 value) { current = value; } |
51 | | |
52 | 0 | UInt32 next() { |
53 | 0 | current = current * a + c; |
54 | 0 | return static_cast<UInt32>(current >> 16); |
55 | 0 | } |
56 | | }; |
57 | | |
58 | 0 | UInt64 calcSeed(UInt64 rand_seed, UInt64 additional_seed) { |
59 | 0 | return int_hash64(rand_seed ^ int_hash64(additional_seed)); |
60 | 0 | } |
61 | | |
62 | 0 | void seed(LinearCongruentialGenerator& generator, UInt64 rand_seed, intptr_t additional_seed) { |
63 | 0 | generator.seed(calcSeed(rand_seed, additional_seed)); |
64 | 0 | } |
65 | | |
66 | | /// The array of random numbers from 'head -c8 /dev/urandom | xxd -p'. |
67 | | /// Can be used for creating seeds for random generators. |
68 | | constexpr std::array<UInt64, 32> random_numbers = { |
69 | | 0x62224b4e764e1560ULL, 0xa79ec6fdbb2ef873ULL, 0xe2862f147d1c0649ULL, 0xc8d47f9a38554cb2ULL, |
70 | | 0x62b0dd532dcd8a43ULL, 0xef3128a01e7a28bcULL, 0x32e4eb5461fc0f6ULL, 0xd3377ce32d3d9579ULL, |
71 | | 0x6f129aa32529a57cULL, 0x98dd0ba25301a5a3ULL, 0x457bd29769afabf1ULL, 0x3bb886ea86263d9dULL, |
72 | | 0xec3e9514dc0bb543ULL, 0x84282031a89ce23eULL, 0x55212b07d1a9a765ULL, 0xe9de69f882aa48afULL, |
73 | | 0x13a71c9baa9babbbULL, 0x3b7be8b0dd9cb586ULL, 0x1375e8cb773f3e35ULL, 0x9f841693b13e615fULL, |
74 | | 0xab62458b90fd9aefULL, 0xa9d9fdd187f8e941ULL, 0xca1851150f831eeaULL, 0xa43f586f9078e918ULL, |
75 | | 0xe336c2883038a257ULL, 0xfebaffc035561545ULL, 0x27c2436d2607840eULL, 0x21bab1489b0ff552ULL, |
76 | | 0x22ca273c2756bb6cULL, 0x4b6260e129af35f1ULL, 0xeb42b6c0d4322c6fULL, 0xfea0f49cc4e68339ULL, |
77 | | }; |
78 | | |
79 | | class UuidNumeric : public IFunction { |
80 | | public: |
81 | | static constexpr auto name = "uuid_numeric"; |
82 | | static constexpr size_t uuid_length = 16; // Int128 |
83 | | |
84 | 9 | static FunctionPtr create() { return std::make_shared<UuidNumeric>(); } |
85 | | |
86 | 1 | String get_name() const override { return name; } |
87 | | |
88 | 0 | bool use_default_implementation_for_constants() const override { return false; } |
89 | | |
90 | 0 | size_t get_number_of_arguments() const override { return 0; } |
91 | | |
92 | 1 | bool is_variadic() const override { return false; } |
93 | | |
94 | | // uuid numeric is a Int128 (maybe UInt128 is better but we do not support it now |
95 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
96 | 0 | return std::make_shared<DataTypeInt128>(); |
97 | 0 | } |
98 | | |
99 | | // TODO(zhiqiang): May be override open function? |
100 | | Status execute_impl(FunctionContext* /*context*/, Block& block, |
101 | | const ColumnNumbers& /*arguments*/, uint32_t result, |
102 | 0 | size_t input_rows_count) const override { |
103 | 0 | auto col_res = ColumnInt128::create(); |
104 | 0 | col_res->resize(input_rows_count); |
105 | |
|
106 | 0 | GenerateUUIDs(reinterpret_cast<char*>(col_res->get_data().data()), |
107 | 0 | uuid_length * input_rows_count); |
108 | |
|
109 | 0 | block.replace_by_position(result, std::move(col_res)); |
110 | 0 | return Status::OK(); |
111 | 0 | } |
112 | | |
113 | | private: |
114 | 0 | void GenerateUUIDs(char* output, size_t size) const { |
115 | 0 | LinearCongruentialGenerator generator0; |
116 | 0 | LinearCongruentialGenerator generator1; |
117 | 0 | LinearCongruentialGenerator generator2; |
118 | 0 | LinearCongruentialGenerator generator3; |
119 | |
|
120 | 0 | UInt64 rand_seed = randomSeed(); |
121 | |
|
122 | 0 | seed(generator0, rand_seed, random_numbers[0] + reinterpret_cast<intptr_t>(output)); |
123 | 0 | seed(generator1, rand_seed, random_numbers[1] + reinterpret_cast<intptr_t>(output)); |
124 | 0 | seed(generator2, rand_seed, random_numbers[2] + reinterpret_cast<intptr_t>(output)); |
125 | 0 | seed(generator3, rand_seed, random_numbers[3] + reinterpret_cast<intptr_t>(output)); |
126 | |
|
127 | 0 | for (const char* end = output + size; output < end; output += 16) { |
128 | 0 | unaligned_store<UInt32>(output, generator0.next()); |
129 | 0 | unaligned_store<UInt32>(output + 4, generator1.next()); |
130 | 0 | unaligned_store<UInt32>(output + 8, generator2.next()); |
131 | 0 | unaligned_store<UInt32>(output + 12, generator3.next()); |
132 | 0 | } |
133 | | /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. |
134 | 0 | } |
135 | | |
136 | 0 | UInt64 randomSeed() const { |
137 | 0 | struct timespec times {}; |
138 | |
|
139 | 0 | clock_gettime(CLOCK_MONOTONIC, ×); |
140 | | /// Not cryptographically secure as time, pid and stack address can be predictable. |
141 | 0 | auto ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>(×.tv_nsec), |
142 | 0 | sizeof(times.tv_nsec), 0); |
143 | 0 | ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>(×.tv_sec), |
144 | 0 | sizeof(times.tv_sec), ret); |
145 | 0 | ret = HashUtil::xxHash64WithSeed(reinterpret_cast<const char*>((uintptr_t)pthread_self()), |
146 | 0 | sizeof(pthread_t), ret); |
147 | |
|
148 | 0 | return ret; |
149 | 0 | } |
150 | | }; |
151 | | |
152 | 8 | void register_function_uuid_numeric(SimpleFunctionFactory& factory) { |
153 | 8 | factory.register_function<UuidNumeric>(); |
154 | 8 | } |
155 | | |
156 | | } // namespace doris |