/root/doris/be/src/vec/functions/random.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <fmt/format.h> |
19 | | #include <glog/logging.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <boost/iterator/iterator_facade.hpp> |
23 | | #include <cstdint> |
24 | | #include <cstdlib> |
25 | | #include <memory> |
26 | | #include <random> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "udf/udf.h" |
31 | | #include "vec/aggregate_functions/aggregate_function.h" |
32 | | #include "vec/columns/column.h" |
33 | | #include "vec/columns/column_vector.h" |
34 | | #include "vec/common/assert_cast.h" |
35 | | #include "vec/core/block.h" |
36 | | #include "vec/core/column_numbers.h" |
37 | | #include "vec/core/types.h" |
38 | | #include "vec/data_types/data_type_number.h" |
39 | | #include "vec/functions/function.h" |
40 | | #include "vec/functions/simple_function_factory.h" |
41 | | |
42 | | namespace doris::vectorized { |
43 | | |
44 | | #include "common/compile_check_begin.h" |
45 | | class Random : public IFunction { |
46 | | public: |
47 | | static constexpr auto name = "random"; |
48 | | |
49 | 9 | static FunctionPtr create() { return std::make_shared<Random>(); } |
50 | | |
51 | 0 | String get_name() const override { return name; } |
52 | | |
53 | 11 | bool use_default_implementation_for_constants() const override { return false; } |
54 | | |
55 | 0 | size_t get_number_of_arguments() const override { return 0; } |
56 | | |
57 | 8 | bool is_variadic() const override { return true; } |
58 | | |
59 | 7 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
60 | 7 | if (arguments.size() == 2) { |
61 | 0 | return std::make_shared<DataTypeInt64>(); |
62 | 0 | } |
63 | 7 | return std::make_shared<DataTypeFloat64>(); |
64 | 7 | } |
65 | | |
66 | 12 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
67 | 12 | std::shared_ptr<std::mt19937_64> generator(new std::mt19937_64()); |
68 | 12 | context->set_function_state(scope, generator); |
69 | 12 | if (scope == FunctionContext::THREAD_LOCAL) { |
70 | 6 | if (context->get_num_args() == 1) { |
71 | | // This is a call to RandSeed, initialize the seed |
72 | 6 | if (!context->is_col_constant(0)) { |
73 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
74 | 0 | } |
75 | 6 | uint32_t seed = 0; |
76 | 6 | if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) { |
77 | 5 | seed = (uint32_t)(*context->get_constant_col(0)->column_ptr)[0].get<int64_t>(); |
78 | 5 | } |
79 | 6 | generator->seed(seed); |
80 | 6 | } else if (context->get_num_args() == 2) { |
81 | 0 | if (!context->is_col_constant(0) || !context->is_col_constant(1)) { |
82 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
83 | 0 | } |
84 | 0 | generator->seed(std::random_device()()); |
85 | 0 | } else { // zero args |
86 | 0 | generator->seed(std::random_device()()); |
87 | 0 | } |
88 | 6 | } |
89 | | |
90 | 12 | return Status::OK(); |
91 | 12 | } |
92 | | |
93 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
94 | 5 | uint32_t result, size_t input_rows_count) const override { |
95 | 5 | if (arguments.size() == 2) { |
96 | 0 | return _execute_int_range(context, block, arguments, result, input_rows_count); |
97 | 0 | } |
98 | 5 | return _execute_float(context, block, arguments, result, input_rows_count); |
99 | 5 | } |
100 | | |
101 | | private: |
102 | | static Status _execute_int_range(FunctionContext* context, Block& block, |
103 | | const ColumnNumbers& arguments, uint32_t result, |
104 | 0 | size_t input_rows_count) { |
105 | 0 | auto res_column = ColumnInt64::create(input_rows_count); |
106 | 0 | auto& res_data = static_cast<ColumnInt64&>(*res_column).get_data(); |
107 | |
|
108 | 0 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
109 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
110 | 0 | DCHECK(generator != nullptr); |
111 | | |
112 | | // checked in open() |
113 | 0 | Int64 min = assert_cast<const ColumnInt64*>( |
114 | 0 | assert_cast<const ColumnConst*>( |
115 | 0 | block.get_by_position(arguments[0]).column.get()) |
116 | 0 | ->get_data_column_ptr() |
117 | 0 | .get()) |
118 | 0 | ->get_element(0); |
119 | 0 | Int64 max = assert_cast<const ColumnInt64*>( |
120 | 0 | assert_cast<const ColumnConst*>( |
121 | 0 | block.get_by_position(arguments[1]).column.get()) |
122 | 0 | ->get_data_column_ptr() |
123 | 0 | .get()) |
124 | 0 | ->get_element(0); |
125 | 0 | if (min >= max) { |
126 | 0 | return Status::InvalidArgument(fmt::format( |
127 | 0 | "random's lower bound should less than upper bound, but got [{}, {})", min, |
128 | 0 | max)); |
129 | 0 | } |
130 | | |
131 | 0 | std::uniform_int_distribution<int64_t> distribution(min, max); |
132 | 0 | for (int i = 0; i < input_rows_count; i++) { |
133 | 0 | res_data[i] = distribution(*generator); |
134 | 0 | } |
135 | |
|
136 | 0 | block.replace_by_position(result, std::move(res_column)); |
137 | 0 | return Status::OK(); |
138 | 0 | } |
139 | | |
140 | | static Status _execute_float(FunctionContext* context, Block& block, |
141 | | const ColumnNumbers& arguments, uint32_t result, |
142 | 5 | size_t input_rows_count) { |
143 | 5 | static const double min = 0.0; |
144 | 5 | static const double max = 1.0; |
145 | 5 | auto res_column = ColumnFloat64::create(input_rows_count); |
146 | 5 | auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data(); |
147 | | |
148 | 5 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
149 | 5 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
150 | 5 | DCHECK(generator != nullptr); |
151 | | |
152 | 5 | std::uniform_real_distribution<double> distribution(min, max); |
153 | 10 | for (int i = 0; i < input_rows_count; i++) { |
154 | 5 | res_data[i] = distribution(*generator); |
155 | 5 | } |
156 | | |
157 | 5 | block.replace_by_position(result, std::move(res_column)); |
158 | 5 | return Status::OK(); |
159 | 5 | } |
160 | | }; |
161 | | |
162 | 1 | void register_function_random(SimpleFunctionFactory& factory) { |
163 | 1 | factory.register_function<Random>(); |
164 | 1 | factory.register_alias(Random::name, "rand"); |
165 | 1 | } |
166 | | #include "common/compile_check_end.h" |
167 | | } // namespace doris::vectorized |