Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/memcmp_small.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcmpSmall.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <cstdint>
25
26
namespace doris::detail {
27
#include "common/compile_check_begin.h"
28
16.6M
inline int cmp(uint8_t a, uint8_t b) {
29
16.6M
    if (a < b) return -1;
30
8.53M
    if (a > b) return 1;
31
0
    return 0;
32
8.53M
}
33
34
4.68M
inline int cmp(size_t a, size_t b) {
35
4.68M
    if (a < b) return -1;
36
4.50M
    if (a > b) return 1;
37
4.35M
    return 0;
38
4.50M
}
39
} // namespace doris::detail
40
41
/// We can process uninitialized memory in the functions below.
42
/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it.
43
/// Disable optimized functions if compile with Memory Sanitizer.
44
45
#if (defined(__SSE2__) && !defined(__aarch64__)) && !defined(MEMORY_SANITIZER)
46
#include "util/sse_util.hpp"
47
48
/** All functions works under the following assumptions:
49
  * - it's possible to read up to 15 excessive bytes after end of 'a' and 'b' region;
50
  * - memory regions are relatively small and extra loop unrolling is not worth to do.
51
  */
52
53
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
54
21.2M
                                         size_t b_size) {
55
21.2M
    size_t min_size = std::min(a_size, b_size);
56
57
778M
    for (size_t offset = 0; offset < min_size; offset += 16) {
58
775M
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
59
775M
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
60
775M
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
61
775M
        mask = ~mask;
62
63
775M
        if (mask) {
64
18.3M
            offset += __builtin_ctz(mask);
65
66
18.3M
            if (offset >= min_size) break;
67
68
16.6M
            return doris::detail::cmp(a[offset], b[offset]);
69
18.3M
        }
70
775M
    }
71
72
4.68M
    return doris::detail::cmp(a_size, b_size);
73
21.2M
}
74
75
/** Variant when memory regions have same size.
76
  * TODO Check if the compiler can optimize previous function when the caller pass identical sizes.
77
  */
78
1.06k
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
79
1.06k
    for (size_t offset = 0; offset < size; offset += 16) {
80
1.06k
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
81
1.06k
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
82
1.06k
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
83
1.06k
        mask = ~mask;
84
85
1.06k
        if (mask) {
86
1.06k
            offset += __builtin_ctz(mask);
87
88
1.06k
            if (offset >= size) return 0;
89
90
948
            return doris::detail::cmp(a[offset], b[offset]);
91
1.06k
        }
92
1.06k
    }
93
94
0
    return 0;
95
1.06k
}
96
97
/** Compare memory regions for equality.
98
  */
99
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
100
0
                                            size_t b_size) {
101
0
    if (a_size != b_size) return false;
102
103
0
    for (size_t offset = 0; offset < a_size; offset += 16) {
104
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
105
0
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
106
0
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
107
0
        mask = ~mask;
108
109
0
        if (mask) {
110
0
            offset += __builtin_ctz(mask);
111
0
            return offset >= a_size;
112
0
        }
113
0
    }
114
115
0
    return true;
116
0
}
117
118
/** Variant when the caller know in advance that the size is a multiple of 16.
119
  */
120
0
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
121
0
    for (size_t offset = 0; offset < size; offset += 16) {
122
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
123
0
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
124
0
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
125
0
        mask = ~mask;
126
0
127
0
        if (mask) {
128
0
            offset += __builtin_ctz(mask);
129
0
            return doris::detail::cmp(a[offset], b[offset]);
130
0
        }
131
0
    }
132
0
133
0
    return 0;
134
0
}
135
136
/** Variant when the size is 16 exactly.
137
  */
138
0
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
139
0
    uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
140
0
            _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
141
0
                           _mm_loadu_si128(reinterpret_cast<const __m128i*>(b)))));
142
0
    mask = ~mask;
143
0
144
0
    if (mask) {
145
0
        auto offset = __builtin_ctz(mask);
146
0
        return doris::detail::cmp(a[offset], b[offset]);
147
0
    }
148
0
149
0
    return 0;
150
0
}
151
152
/** Variant when the size is 16 exactly.
153
  */
154
0
inline bool memequal16(const void* a, const void* b) {
155
0
    return 0xFFFF ==
156
0
           _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
157
0
                                            _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))));
158
0
}
159
160
/** Compare memory region to zero */
161
0
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
162
0
    const __m128i zero16 = _mm_setzero_si128();
163
0
164
0
    for (size_t offset = 0; offset < size; offset += 16) {
165
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
166
0
                _mm_cmpeq_epi8(zero16, _mm_loadu_si128(reinterpret_cast<const __m128i*>(
167
0
                                               reinterpret_cast<const char*>(data) + offset)))));
168
0
        mask = ~mask;
169
0
170
0
        if (mask) {
171
0
            offset += __builtin_ctz(mask);
172
0
            return offset >= size;
173
0
        }
174
0
    }
175
0
176
0
    return true;
177
0
}
178
179
#else
180
181
#include <cstring>
182
183
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
184
                                         size_t b_size) {
185
    if (auto res = memcmp(a, b, std::min(a_size, b_size)))
186
        return res;
187
    else
188
        return doris::detail::cmp(a_size, b_size);
189
}
190
191
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
192
    return memcmp(a, b, size);
193
}
194
195
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
196
                                            size_t b_size) {
197
    return a_size == b_size && 0 == memcmp(a, b, a_size);
198
}
199
200
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
201
    return memcmp(a, b, size);
202
}
203
204
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
205
    return memcmp(a, b, 16);
206
}
207
208
inline bool memequal16(const void* a, const void* b) {
209
    return 0 == memcmp(a, b, 16);
210
}
211
212
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
213
    const char* pos = reinterpret_cast<const char*>(data);
214
    const char* end = pos + size;
215
216
    for (; pos < end; ++pos)
217
        if (*pos) return false;
218
219
    return true;
220
}
221
222
#endif
223
224
#include "common/compile_check_end.h"