Coverage Report

Created: 2026-04-14 20:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/memcmp_small.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcmpSmall.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <cstdint>
25
26
namespace doris::detail {
27
16.6M
inline int cmp(uint8_t a, uint8_t b) {
28
16.6M
    if (a < b) return -1;
29
8.53M
    if (a > b) return 1;
30
0
    return 0;
31
8.53M
}
32
33
4.68M
inline int cmp(size_t a, size_t b) {
34
4.68M
    if (a < b) return -1;
35
4.50M
    if (a > b) return 1;
36
4.35M
    return 0;
37
4.50M
}
38
} // namespace doris::detail
39
40
/// We can process uninitialized memory in the functions below.
41
/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it.
42
/// Disable optimized functions if compile with Memory Sanitizer.
43
44
#if (defined(__SSE2__) && !defined(__aarch64__)) && !defined(MEMORY_SANITIZER)
45
#include "util/sse_util.hpp"
46
47
/** All functions works under the following assumptions:
48
  * - it's possible to read up to 15 excessive bytes after end of 'a' and 'b' region;
49
  * - memory regions are relatively small and extra loop unrolling is not worth to do.
50
  */
51
52
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
53
21.2M
                                         size_t b_size) {
54
21.2M
    size_t min_size = std::min(a_size, b_size);
55
56
778M
    for (size_t offset = 0; offset < min_size; offset += 16) {
57
775M
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
58
775M
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
59
775M
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
60
775M
        mask = static_cast<uint16_t>(~mask);
61
62
775M
        if (mask) {
63
18.3M
            offset += __builtin_ctz(mask);
64
65
18.3M
            if (offset >= min_size) break;
66
67
16.6M
            return doris::detail::cmp(a[offset], b[offset]);
68
18.3M
        }
69
775M
    }
70
71
4.68M
    return doris::detail::cmp(a_size, b_size);
72
21.2M
}
73
74
/** Variant when memory regions have same size.
75
  * TODO Check if the compiler can optimize previous function when the caller pass identical sizes.
76
  */
77
1.06k
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
78
1.06k
    for (size_t offset = 0; offset < size; offset += 16) {
79
1.06k
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
80
1.06k
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
81
1.06k
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
82
1.06k
        mask = static_cast<uint16_t>(~mask);
83
84
1.06k
        if (mask) {
85
1.06k
            offset += __builtin_ctz(mask);
86
87
1.06k
            if (offset >= size) return 0;
88
89
948
            return doris::detail::cmp(a[offset], b[offset]);
90
1.06k
        }
91
1.06k
    }
92
93
0
    return 0;
94
1.06k
}
95
96
/** Compare memory regions for equality.
97
  */
98
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
99
0
                                            size_t b_size) {
100
0
    if (a_size != b_size) return false;
101
102
0
    for (size_t offset = 0; offset < a_size; offset += 16) {
103
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
104
0
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
105
0
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
106
0
        mask = static_cast<uint16_t>(~mask);
107
108
0
        if (mask) {
109
0
            offset += __builtin_ctz(mask);
110
0
            return offset >= a_size;
111
0
        }
112
0
    }
113
114
0
    return true;
115
0
}
116
117
/** Variant when the caller know in advance that the size is a multiple of 16.
118
  */
119
0
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
120
0
    for (size_t offset = 0; offset < size; offset += 16) {
121
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
122
0
                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
123
0
                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset)))));
124
0
        mask = static_cast<uint16_t>(~mask);
125
0
126
0
        if (mask) {
127
0
            offset += __builtin_ctz(mask);
128
0
            return doris::detail::cmp(a[offset], b[offset]);
129
0
        }
130
0
    }
131
0
132
0
    return 0;
133
0
}
134
135
/** Variant when the size is 16 exactly.
136
  */
137
0
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
138
0
    uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
139
0
            _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
140
0
                           _mm_loadu_si128(reinterpret_cast<const __m128i*>(b)))));
141
0
    mask = static_cast<uint16_t>(~mask);
142
0
143
0
    if (mask) {
144
0
        auto offset = __builtin_ctz(mask);
145
0
        return doris::detail::cmp(a[offset], b[offset]);
146
0
    }
147
0
148
0
    return 0;
149
0
}
150
151
/** Variant when the size is 16 exactly.
152
  */
153
0
inline bool memequal16(const void* a, const void* b) {
154
0
    return 0xFFFF ==
155
0
           _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
156
0
                                            _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))));
157
0
}
158
159
/** Compare memory region to zero */
160
0
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
161
0
    const __m128i zero16 = _mm_setzero_si128();
162
0
163
0
    for (size_t offset = 0; offset < size; offset += 16) {
164
0
        uint16_t mask = static_cast<uint16_t>(_mm_movemask_epi8(
165
0
                _mm_cmpeq_epi8(zero16, _mm_loadu_si128(reinterpret_cast<const __m128i*>(
166
0
                                               reinterpret_cast<const char*>(data) + offset)))));
167
0
        mask = static_cast<uint16_t>(~mask);
168
0
169
0
        if (mask) {
170
0
            offset += __builtin_ctz(mask);
171
0
            return offset >= size;
172
0
        }
173
0
    }
174
0
175
0
    return true;
176
0
}
177
178
#else
179
180
#include <cstring>
181
182
inline int memcmp_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
183
                                         size_t b_size) {
184
    if (auto res = memcmp(a, b, std::min(a_size, b_size)))
185
        return res;
186
    else
187
        return doris::detail::cmp(a_size, b_size);
188
}
189
190
inline int memcmp_small_allow_overflow15(const uint8_t* a, const uint8_t* b, size_t size) {
191
    return memcmp(a, b, size);
192
}
193
194
inline bool memequal_small_allow_overflow15(const uint8_t* a, size_t a_size, const uint8_t* b,
195
                                            size_t b_size) {
196
    return a_size == b_size && 0 == memcmp(a, b, a_size);
197
}
198
199
inline int memcmp_small_multiple_of16(const uint8_t* a, const uint8_t* b, size_t size) {
200
    return memcmp(a, b, size);
201
}
202
203
inline int memcmp16(const uint8_t* a, const uint8_t* b) {
204
    return memcmp(a, b, 16);
205
}
206
207
inline bool memequal16(const void* a, const void* b) {
208
    return 0 == memcmp(a, b, 16);
209
}
210
211
inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
212
    const char* pos = reinterpret_cast<const char*>(data);
213
    const char* end = pos + size;
214
215
    for (; pos < end; ++pos)
216
        if (*pos) return false;
217
218
    return true;
219
}
220
221
#endif