/root/doris/be/src/vec/functions/random.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <fmt/format.h> |
19 | | #include <glog/logging.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <boost/iterator/iterator_facade.hpp> |
23 | | #include <cstdint> |
24 | | #include <cstdlib> |
25 | | #include <memory> |
26 | | #include <random> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "udf/udf.h" |
31 | | #include "vec/aggregate_functions/aggregate_function.h" |
32 | | #include "vec/columns/column.h" |
33 | | #include "vec/columns/column_vector.h" |
34 | | #include "vec/columns/columns_number.h" |
35 | | #include "vec/common/assert_cast.h" |
36 | | #include "vec/core/block.h" |
37 | | #include "vec/core/column_numbers.h" |
38 | | #include "vec/core/types.h" |
39 | | #include "vec/data_types/data_type_number.h" |
40 | | #include "vec/functions/function.h" |
41 | | #include "vec/functions/simple_function_factory.h" |
42 | | |
43 | | namespace doris::vectorized { |
44 | | class Random : public IFunction { |
45 | | public: |
46 | | static constexpr auto name = "random"; |
47 | | |
48 | 8 | static FunctionPtr create() { return std::make_shared<Random>(); } |
49 | | |
50 | 0 | String get_name() const override { return name; } |
51 | | |
52 | 11 | bool use_default_implementation_for_constants() const override { return false; } |
53 | | |
54 | 0 | size_t get_number_of_arguments() const override { return 0; } |
55 | | |
56 | 7 | bool is_variadic() const override { return true; } |
57 | | |
58 | 7 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
59 | 7 | if (arguments.size() == 2) { |
60 | 0 | return std::make_shared<DataTypeInt64>(); |
61 | 0 | } |
62 | 7 | return std::make_shared<DataTypeFloat64>(); |
63 | 7 | } |
64 | | |
65 | 12 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
66 | 12 | std::shared_ptr<std::mt19937_64> generator(new std::mt19937_64()); |
67 | 12 | context->set_function_state(scope, generator); |
68 | 12 | if (scope == FunctionContext::THREAD_LOCAL) { |
69 | 6 | if (context->get_num_args() == 1) { |
70 | | // This is a call to RandSeed, initialize the seed |
71 | 6 | if (!context->is_col_constant(0)) { |
72 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
73 | 0 | } |
74 | 6 | uint32_t seed = 0; |
75 | 6 | if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) { |
76 | 5 | seed = (*context->get_constant_col(0)->column_ptr)[0].get<int64_t>(); |
77 | 5 | } |
78 | 6 | generator->seed(seed); |
79 | 6 | } else if (context->get_num_args() == 2) { |
80 | 0 | if (!context->is_col_constant(0) || !context->is_col_constant(1)) { |
81 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
82 | 0 | } |
83 | 0 | generator->seed(std::random_device()()); |
84 | 0 | } else { // zero args |
85 | 0 | generator->seed(std::random_device()()); |
86 | 0 | } |
87 | 6 | } |
88 | | |
89 | 12 | return Status::OK(); |
90 | 12 | } |
91 | | |
92 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
93 | 5 | uint32_t result, size_t input_rows_count) const override { |
94 | 5 | if (arguments.size() == 2) { |
95 | 0 | return _execute_int_range(context, block, arguments, result, input_rows_count); |
96 | 0 | } |
97 | 5 | return _execute_float(context, block, arguments, result, input_rows_count); |
98 | 5 | } |
99 | | |
100 | 12 | Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
101 | 12 | return Status::OK(); |
102 | 12 | } |
103 | | |
104 | | private: |
105 | | static Status _execute_int_range(FunctionContext* context, Block& block, |
106 | | const ColumnNumbers& arguments, uint32_t result, |
107 | 0 | size_t input_rows_count) { |
108 | 0 | auto res_column = ColumnInt64::create(input_rows_count); |
109 | 0 | auto& res_data = static_cast<ColumnInt64&>(*res_column).get_data(); |
110 | |
|
111 | 0 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
112 | 0 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
113 | 0 | DCHECK(generator != nullptr); |
114 | | |
115 | | // checked in open() |
116 | 0 | Int64 min = assert_cast<const ColumnInt64*>( |
117 | 0 | assert_cast<const ColumnConst*>( |
118 | 0 | block.get_by_position(arguments[0]).column.get()) |
119 | 0 | ->get_data_column_ptr() |
120 | 0 | .get()) |
121 | 0 | ->get_element(0); |
122 | 0 | Int64 max = assert_cast<const ColumnInt64*>( |
123 | 0 | assert_cast<const ColumnConst*>( |
124 | 0 | block.get_by_position(arguments[1]).column.get()) |
125 | 0 | ->get_data_column_ptr() |
126 | 0 | .get()) |
127 | 0 | ->get_element(0); |
128 | 0 | if (min >= max) { |
129 | 0 | return Status::InvalidArgument(fmt::format( |
130 | 0 | "random's lower bound should less than upper bound, but got [{}, {})", min, |
131 | 0 | max)); |
132 | 0 | } |
133 | | |
134 | 0 | std::uniform_int_distribution<int64_t> distribution(min, max); |
135 | 0 | for (int i = 0; i < input_rows_count; i++) { |
136 | 0 | res_data[i] = distribution(*generator); |
137 | 0 | } |
138 | |
|
139 | 0 | block.replace_by_position(result, std::move(res_column)); |
140 | 0 | return Status::OK(); |
141 | 0 | } |
142 | | |
143 | | static Status _execute_float(FunctionContext* context, Block& block, |
144 | | const ColumnNumbers& arguments, uint32_t result, |
145 | 5 | size_t input_rows_count) { |
146 | 5 | static const double min = 0.0; |
147 | 5 | static const double max = 1.0; |
148 | 5 | auto res_column = ColumnFloat64::create(input_rows_count); |
149 | 5 | auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data(); |
150 | | |
151 | 5 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
152 | 5 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
153 | 5 | DCHECK(generator != nullptr); |
154 | | |
155 | 5 | std::uniform_real_distribution<double> distribution(min, max); |
156 | 10 | for (int i = 0; i < input_rows_count; i++) { |
157 | 5 | res_data[i] = distribution(*generator); |
158 | 5 | } |
159 | | |
160 | 5 | block.replace_by_position(result, std::move(res_column)); |
161 | 5 | return Status::OK(); |
162 | 5 | } |
163 | | }; |
164 | | |
165 | 1 | void register_function_random(SimpleFunctionFactory& factory) { |
166 | 1 | factory.register_function<Random>(); |
167 | 1 | factory.register_alias(Random::name, "rand"); |
168 | 1 | } |
169 | | |
170 | | } // namespace doris::vectorized |