be/src/exprs/function/fmod_fast.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/function/fmod_fast.h" |
19 | | |
20 | | #include <string.h> |
21 | | |
22 | | #include <cmath> |
23 | | |
24 | | #include "common/compiler_util.h" |
25 | | |
26 | | namespace doris::fmod_fast { |
27 | | namespace { |
28 | | |
29 | | #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__)) |
30 | 0 | #define DORIS_HAS_X87_FMOD_FAST 1 |
31 | | |
32 | 572k | ALWAYS_INLINE inline double fmod_x87_fprem(double a, double b) { |
33 | 572k | double r; |
34 | 572k | asm volatile( |
35 | 572k | "fldl %[b]\n\t" |
36 | 572k | "fldl %[a]\n\t" |
37 | 572k | "1:\n\t" |
38 | 572k | "fprem\n\t" |
39 | 572k | "fnstsw %%ax\n\t" |
40 | 572k | "testb $4, %%ah\n\t" |
41 | 572k | "jne 1b\n\t" |
42 | 572k | "fstp %%st(1)\n\t" |
43 | 572k | "fstpl %[r]\n\t" |
44 | 572k | : [r] "=m"(r) |
45 | 572k | : [a] "m"(a), [b] "m"(b) |
46 | 572k | : "ax", "cc", "st"); |
47 | 572k | return r; |
48 | 572k | } |
49 | | #else |
50 | | #define DORIS_HAS_X87_FMOD_FAST 0 |
51 | | #endif |
52 | | |
53 | 941k | ALWAYS_INLINE inline double fmod_double(double a, double b) { |
54 | 941k | #if DORIS_HAS_X87_FMOD_FAST |
55 | 941k | if (b != 0.0 && std::isfinite(a) && std::isfinite(b)) { |
56 | 940k | double abs_a = std::fabs(a); |
57 | 940k | double abs_b = std::fabs(b); |
58 | 940k | if (abs_a < abs_b) { |
59 | 293k | return a; |
60 | 293k | } |
61 | 647k | if (abs_a == abs_b) { |
62 | 74.9k | return std::copysign(0.0, a); |
63 | 74.9k | } |
64 | 572k | return fmod_x87_fprem(a, b); |
65 | 647k | } |
66 | 792 | #endif |
67 | 792 | return std::fmod(a, b); |
68 | 941k | } |
69 | | |
70 | 470k | ALWAYS_INLINE inline float fmod_float(float a, float b) { |
71 | 470k | return static_cast<float>(fmod_double(static_cast<double>(a), static_cast<double>(b))); |
72 | 470k | } |
73 | | |
74 | 390k | ALWAYS_INLINE inline double fmod_value(double a, double b) { |
75 | 390k | return fmod_double(a, b); |
76 | 390k | } |
77 | | |
78 | 390k | ALWAYS_INLINE inline float fmod_value(float a, float b) { |
79 | 390k | return fmod_float(a, b); |
80 | 390k | } |
81 | | |
82 | | template <typename T> |
83 | | ALWAYS_INLINE inline void vector_vector_impl(const T* lhs, const T* rhs, T* result, |
84 | 4 | uint8_t* null_map, size_t size) { |
85 | 260k | for (size_t i = 0; i < size; ++i) { |
86 | 260k | uint8_t is_null = rhs[i] == T(0); |
87 | 260k | null_map[i] = is_null; |
88 | 260k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); |
89 | 260k | result[i] = fmod_value(lhs[i], adjusted_rhs); |
90 | 260k | } |
91 | 4 | } fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_118vector_vector_implIdEEvPKT_S5_PS3_Phm Line | Count | Source | 84 | 3 | uint8_t* null_map, size_t size) { | 85 | 130k | for (size_t i = 0; i < size; ++i) { | 86 | 130k | uint8_t is_null = rhs[i] == T(0); | 87 | 130k | null_map[i] = is_null; | 88 | 130k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); | 89 | 130k | result[i] = fmod_value(lhs[i], adjusted_rhs); | 90 | 130k | } | 91 | 3 | } |
fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_118vector_vector_implIfEEvPKT_S5_PS3_Phm Line | Count | Source | 84 | 1 | uint8_t* null_map, size_t size) { | 85 | 130k | for (size_t i = 0; i < size; ++i) { | 86 | 130k | uint8_t is_null = rhs[i] == T(0); | 87 | 130k | null_map[i] = is_null; | 88 | 130k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); | 89 | 130k | result[i] = fmod_value(lhs[i], adjusted_rhs); | 90 | 130k | } | 91 | 1 | } |
|
92 | | |
93 | | template <typename T> |
94 | | ALWAYS_INLINE inline void vector_constant_impl(const T* lhs, T rhs, T* result, uint8_t* null_map, |
95 | 4 | size_t size) { |
96 | 4 | uint8_t is_null = rhs == T(0); |
97 | 4 | memset(null_map, is_null, size); |
98 | 4 | if (is_null) { |
99 | 2 | return; |
100 | 2 | } |
101 | | |
102 | 260k | for (size_t i = 0; i < size; ++i) { |
103 | 260k | result[i] = fmod_value(lhs[i], rhs); |
104 | 260k | } |
105 | 2 | } fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_120vector_constant_implIdEEvPKT_S3_PS3_Phm Line | Count | Source | 95 | 2 | size_t size) { | 96 | 2 | uint8_t is_null = rhs == T(0); | 97 | 2 | memset(null_map, is_null, size); | 98 | 2 | if (is_null) { | 99 | 1 | return; | 100 | 1 | } | 101 | | | 102 | 130k | for (size_t i = 0; i < size; ++i) { | 103 | 130k | result[i] = fmod_value(lhs[i], rhs); | 104 | 130k | } | 105 | 1 | } |
fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_120vector_constant_implIfEEvPKT_S3_PS3_Phm Line | Count | Source | 95 | 2 | size_t size) { | 96 | 2 | uint8_t is_null = rhs == T(0); | 97 | 2 | memset(null_map, is_null, size); | 98 | 2 | if (is_null) { | 99 | 1 | return; | 100 | 1 | } | 101 | | | 102 | 130k | for (size_t i = 0; i < size; ++i) { | 103 | 130k | result[i] = fmod_value(lhs[i], rhs); | 104 | 130k | } | 105 | 1 | } |
|
106 | | |
107 | | template <typename T> |
108 | | ALWAYS_INLINE inline void constant_vector_impl(T lhs, const T* rhs, T* result, uint8_t* null_map, |
109 | 2 | size_t size) { |
110 | 260k | for (size_t i = 0; i < size; ++i) { |
111 | 260k | uint8_t is_null = rhs[i] == T(0); |
112 | 260k | null_map[i] = is_null; |
113 | 260k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); |
114 | 260k | result[i] = fmod_value(lhs, adjusted_rhs); |
115 | 260k | } |
116 | 2 | } fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_120constant_vector_implIdEEvT_PKS3_PS3_Phm Line | Count | Source | 109 | 1 | size_t size) { | 110 | 130k | for (size_t i = 0; i < size; ++i) { | 111 | 130k | uint8_t is_null = rhs[i] == T(0); | 112 | 130k | null_map[i] = is_null; | 113 | 130k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); | 114 | 130k | result[i] = fmod_value(lhs, adjusted_rhs); | 115 | 130k | } | 116 | 1 | } |
fmod_fast.cpp:_ZN5doris9fmod_fast12_GLOBAL__N_120constant_vector_implIfEEvT_PKS3_PS3_Phm Line | Count | Source | 109 | 1 | size_t size) { | 110 | 130k | for (size_t i = 0; i < size; ++i) { | 111 | 130k | uint8_t is_null = rhs[i] == T(0); | 112 | 130k | null_map[i] = is_null; | 113 | 130k | T adjusted_rhs = rhs[i] + static_cast<T>(is_null); | 114 | 130k | result[i] = fmod_value(lhs, adjusted_rhs); | 115 | 130k | } | 116 | 1 | } |
|
117 | | |
118 | | } // namespace |
119 | | |
120 | 0 | bool is_x87_fast_path_enabled() { |
121 | 0 | return DORIS_HAS_X87_FMOD_FAST; |
122 | 0 | } |
123 | | |
124 | 80.5k | double scalar(double a, double b) { |
125 | 80.5k | return fmod_double(a, b); |
126 | 80.5k | } |
127 | | |
128 | 80.5k | float scalar(float a, float b) { |
129 | 80.5k | return fmod_float(a, b); |
130 | 80.5k | } |
131 | | |
132 | | void vector_vector(const double* lhs, const double* rhs, double* result, uint8_t* null_map, |
133 | 3 | size_t size) { |
134 | 3 | vector_vector_impl(lhs, rhs, result, null_map, size); |
135 | 3 | } |
136 | | |
137 | | void vector_vector(const float* lhs, const float* rhs, float* result, uint8_t* null_map, |
138 | 1 | size_t size) { |
139 | 1 | vector_vector_impl(lhs, rhs, result, null_map, size); |
140 | 1 | } |
141 | | |
142 | | void vector_constant(const double* lhs, double rhs, double* result, uint8_t* null_map, |
143 | 2 | size_t size) { |
144 | 2 | vector_constant_impl(lhs, rhs, result, null_map, size); |
145 | 2 | } |
146 | | |
147 | 2 | void vector_constant(const float* lhs, float rhs, float* result, uint8_t* null_map, size_t size) { |
148 | 2 | vector_constant_impl(lhs, rhs, result, null_map, size); |
149 | 2 | } |
150 | | |
151 | | void constant_vector(double lhs, const double* rhs, double* result, uint8_t* null_map, |
152 | 1 | size_t size) { |
153 | 1 | constant_vector_impl(lhs, rhs, result, null_map, size); |
154 | 1 | } |
155 | | |
156 | 1 | void constant_vector(float lhs, const float* rhs, float* result, uint8_t* null_map, size_t size) { |
157 | 1 | constant_vector_impl(lhs, rhs, result, null_map, size); |
158 | 1 | } |
159 | | |
160 | | } // namespace doris::fmod_fast |