Coverage Report

Created: 2026-05-27 14:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/md5.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/md5.h"
19
20
#include <algorithm>
21
#include <cstring>
22
#include <vector>
23
24
#ifdef __AVX2__
25
#include <immintrin.h>
26
#endif
27
28
#include "exec/common/endian.h"
29
30
namespace doris {
31
32
namespace {
33
34
constexpr unsigned char MD5_DUMMY_INPUT = 0;
35
36
1.66k
void md5_to_hex(const unsigned char* digest, char* out) {
37
1.66k
    static constexpr char DIGITS[] = "0123456789abcdef";
38
28.2k
    for (int i = 0; i < MD5_DIGEST_LENGTH; ++i) {
39
26.6k
        *out++ = DIGITS[digest[i] >> 4];
40
26.6k
        *out++ = DIGITS[digest[i] & 0x0F];
41
26.6k
    }
42
1.66k
}
43
44
#ifdef __AVX2__
45
46
constexpr uint32_t MD5_A0 = 0x67452301;
47
constexpr uint32_t MD5_B0 = 0xefcdab89;
48
constexpr uint32_t MD5_C0 = 0x98badcfe;
49
constexpr uint32_t MD5_D0 = 0x10325476;
50
51
3.39k
size_t md5_num_blocks(size_t len) {
52
3.39k
    return (len + 9 + 63) / 64;
53
3.39k
}
54
55
2.73k
size_t md5_pad_final_blocks(const unsigned char* data, size_t len, unsigned char* out) {
56
2.73k
    size_t full_blocks = len / 64;
57
2.73k
    size_t tail = len % 64;
58
2.73k
    size_t num_blocks = md5_num_blocks(len);
59
2.73k
    size_t final_count = num_blocks - full_blocks;
60
61
2.73k
    std::memset(out, 0, final_count * 64);
62
2.73k
    std::memcpy(out, data + full_blocks * 64, tail);
63
2.73k
    out[tail] = 0x80;
64
2.73k
    LittleEndian::Store64(out + final_count * 64 - 8, static_cast<uint64_t>(len) * 8);
65
66
2.73k
    return final_count;
67
2.73k
}
68
69
struct AVX2MD5Ops {
70
    using Vec = __m256i;
71
    static constexpr size_t LANES = 8;
72
73
9.29M
    static Vec add(Vec a, Vec b) { return _mm256_add_epi32(a, b); }
74
75
1.14M
    static Vec set1(uint32_t v) { return _mm256_set1_epi32(static_cast<int>(v)); }
76
77
572k
    static Vec loadu(const void* p) {
78
572k
        return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(p));
79
572k
    }
80
81
2.62k
    static void storeu(void* p, Vec v) { _mm256_storeu_si256(reinterpret_cast<__m256i*>(p), v); }
82
83
    template <int N>
84
2.28M
    static Vec rotl(Vec x) {
85
2.28M
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
2.28M
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi7EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi12EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi17EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi22EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi5EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi9EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi14EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi20EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi4EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi11EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi16EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi23EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi6EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi10EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi15EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
md5.cpp:_ZN5doris12_GLOBAL__N_110AVX2MD5Ops4rotlILi21EEEDv4_xS3_
Line
Count
Source
84
143k
    static Vec rotl(Vec x) {
85
143k
        return _mm256_or_si256(_mm256_slli_epi32(x, N), _mm256_srli_epi32(x, 32 - N));
86
143k
    }
87
88
572k
    static Vec F(Vec b, Vec c, Vec d) {
89
572k
        return _mm256_xor_si256(d, _mm256_and_si256(b, _mm256_xor_si256(c, d)));
90
572k
    }
91
92
572k
    static Vec G(Vec b, Vec c, Vec d) {
93
572k
        return _mm256_xor_si256(c, _mm256_and_si256(d, _mm256_xor_si256(b, c)));
94
572k
    }
95
96
572k
    static Vec H(Vec b, Vec c, Vec d) { return _mm256_xor_si256(b, _mm256_xor_si256(c, d)); }
97
98
572k
    static Vec I(Vec b, Vec c, Vec d) {
99
572k
        return _mm256_xor_si256(c, _mm256_or_si256(b, _mm256_xor_si256(d, _mm256_set1_epi32(-1))));
100
572k
    }
101
102
35.7k
    static void gather_all_message_words(const unsigned char* const block_ptrs[], Vec msg[16]) {
103
107k
        for (int half = 0; half < 2; ++half) {
104
71.5k
            size_t off = half * 32;
105
71.5k
            Vec r0 = loadu(block_ptrs[0] + off);
106
71.5k
            Vec r1 = loadu(block_ptrs[1] + off);
107
71.5k
            Vec r2 = loadu(block_ptrs[2] + off);
108
71.5k
            Vec r3 = loadu(block_ptrs[3] + off);
109
71.5k
            Vec r4 = loadu(block_ptrs[4] + off);
110
71.5k
            Vec r5 = loadu(block_ptrs[5] + off);
111
71.5k
            Vec r6 = loadu(block_ptrs[6] + off);
112
71.5k
            Vec r7 = loadu(block_ptrs[7] + off);
113
114
71.5k
            Vec t0 = _mm256_unpacklo_epi32(r0, r1);
115
71.5k
            Vec t1 = _mm256_unpackhi_epi32(r0, r1);
116
71.5k
            Vec t2 = _mm256_unpacklo_epi32(r2, r3);
117
71.5k
            Vec t3 = _mm256_unpackhi_epi32(r2, r3);
118
71.5k
            Vec t4 = _mm256_unpacklo_epi32(r4, r5);
119
71.5k
            Vec t5 = _mm256_unpackhi_epi32(r4, r5);
120
71.5k
            Vec t6 = _mm256_unpacklo_epi32(r6, r7);
121
71.5k
            Vec t7 = _mm256_unpackhi_epi32(r6, r7);
122
123
71.5k
            Vec u0 = _mm256_unpacklo_epi64(t0, t2);
124
71.5k
            Vec u1 = _mm256_unpackhi_epi64(t0, t2);
125
71.5k
            Vec u2 = _mm256_unpacklo_epi64(t1, t3);
126
71.5k
            Vec u3 = _mm256_unpackhi_epi64(t1, t3);
127
71.5k
            Vec u4 = _mm256_unpacklo_epi64(t4, t6);
128
71.5k
            Vec u5 = _mm256_unpackhi_epi64(t4, t6);
129
71.5k
            Vec u6 = _mm256_unpacklo_epi64(t5, t7);
130
71.5k
            Vec u7 = _mm256_unpackhi_epi64(t5, t7);
131
132
71.5k
            size_t base = half * 8;
133
71.5k
            msg[base + 0] = _mm256_permute2x128_si256(u0, u4, 0x20);
134
71.5k
            msg[base + 4] = _mm256_permute2x128_si256(u0, u4, 0x31);
135
71.5k
            msg[base + 1] = _mm256_permute2x128_si256(u1, u5, 0x20);
136
71.5k
            msg[base + 5] = _mm256_permute2x128_si256(u1, u5, 0x31);
137
71.5k
            msg[base + 2] = _mm256_permute2x128_si256(u2, u6, 0x20);
138
71.5k
            msg[base + 6] = _mm256_permute2x128_si256(u2, u6, 0x31);
139
71.5k
            msg[base + 3] = _mm256_permute2x128_si256(u3, u7, 0x20);
140
71.5k
            msg[base + 7] = _mm256_permute2x128_si256(u3, u7, 0x31);
141
71.5k
        }
142
35.7k
    }
143
};
144
145
#define MD5_STEP_X2(func, w1, x1, y1, z1, w2, x2, y2, z2, g, s, ti) \
146
1.14M
    {                                                               \
147
1.14M
        Vec t1 = Ops::func(x1, y1, z1);                             \
148
1.14M
        Vec t2 = Ops::func(x2, y2, z2);                             \
149
1.14M
        t1 = Ops::add(t1, w1);                                      \
150
1.14M
        t2 = Ops::add(t2, w2);                                      \
151
1.14M
        Vec k = Ops::set1(ti);                                      \
152
1.14M
        t1 = Ops::add(t1, k);                                       \
153
1.14M
        t2 = Ops::add(t2, k);                                       \
154
1.14M
        t1 = Ops::add(t1, msg1[g]);                                 \
155
1.14M
        t2 = Ops::add(t2, msg2[g]);                                 \
156
1.14M
        (w1) = Ops::add(x1, Ops::template rotl<s>(t1));             \
157
1.14M
        (w2) = Ops::add(x2, Ops::template rotl<s>(t2));             \
158
1.14M
    }
159
160
template <typename Ops>
161
struct MD5X2State {
162
    typename Ops::Vec a1, b1, c1, d1, a2, b2, c2, d2;
163
};
164
165
template <typename Ops>
166
MD5X2State<Ops> md5_multi_buffer_block_x2(typename Ops::Vec a1, typename Ops::Vec b1,
167
                                          typename Ops::Vec c1, typename Ops::Vec d1,
168
                                          typename Ops::Vec a2, typename Ops::Vec b2,
169
                                          typename Ops::Vec c2, typename Ops::Vec d2,
170
                                          const typename Ops::Vec msg1[16],
171
17.8k
                                          const typename Ops::Vec msg2[16]) {
172
17.8k
    using Vec = typename Ops::Vec;
173
17.8k
    Vec aa1 = a1;
174
17.8k
    Vec bb1 = b1;
175
17.8k
    Vec cc1 = c1;
176
17.8k
    Vec dd1 = d1;
177
17.8k
    Vec aa2 = a2;
178
17.8k
    Vec bb2 = b2;
179
17.8k
    Vec cc2 = c2;
180
17.8k
    Vec dd2 = d2;
181
182
17.8k
    MD5_STEP_X2(F, a1, b1, c1, d1, a2, b2, c2, d2, 0, 7, 0xd76aa478)
183
17.8k
    MD5_STEP_X2(F, d1, a1, b1, c1, d2, a2, b2, c2, 1, 12, 0xe8c7b756)
184
17.8k
    MD5_STEP_X2(F, c1, d1, a1, b1, c2, d2, a2, b2, 2, 17, 0x242070db)
185
17.8k
    MD5_STEP_X2(F, b1, c1, d1, a1, b2, c2, d2, a2, 3, 22, 0xc1bdceee)
186
17.8k
    MD5_STEP_X2(F, a1, b1, c1, d1, a2, b2, c2, d2, 4, 7, 0xf57c0faf)
187
17.8k
    MD5_STEP_X2(F, d1, a1, b1, c1, d2, a2, b2, c2, 5, 12, 0x4787c62a)
188
17.8k
    MD5_STEP_X2(F, c1, d1, a1, b1, c2, d2, a2, b2, 6, 17, 0xa8304613)
189
17.8k
    MD5_STEP_X2(F, b1, c1, d1, a1, b2, c2, d2, a2, 7, 22, 0xfd469501)
190
17.8k
    MD5_STEP_X2(F, a1, b1, c1, d1, a2, b2, c2, d2, 8, 7, 0x698098d8)
191
17.8k
    MD5_STEP_X2(F, d1, a1, b1, c1, d2, a2, b2, c2, 9, 12, 0x8b44f7af)
192
17.8k
    MD5_STEP_X2(F, c1, d1, a1, b1, c2, d2, a2, b2, 10, 17, 0xffff5bb1)
193
17.8k
    MD5_STEP_X2(F, b1, c1, d1, a1, b2, c2, d2, a2, 11, 22, 0x895cd7be)
194
17.8k
    MD5_STEP_X2(F, a1, b1, c1, d1, a2, b2, c2, d2, 12, 7, 0x6b901122)
195
17.8k
    MD5_STEP_X2(F, d1, a1, b1, c1, d2, a2, b2, c2, 13, 12, 0xfd987193)
196
17.8k
    MD5_STEP_X2(F, c1, d1, a1, b1, c2, d2, a2, b2, 14, 17, 0xa679438e)
197
17.8k
    MD5_STEP_X2(F, b1, c1, d1, a1, b2, c2, d2, a2, 15, 22, 0x49b40821)
198
199
17.8k
    MD5_STEP_X2(G, a1, b1, c1, d1, a2, b2, c2, d2, 1, 5, 0xf61e2562)
200
17.8k
    MD5_STEP_X2(G, d1, a1, b1, c1, d2, a2, b2, c2, 6, 9, 0xc040b340)
201
17.8k
    MD5_STEP_X2(G, c1, d1, a1, b1, c2, d2, a2, b2, 11, 14, 0x265e5a51)
202
17.8k
    MD5_STEP_X2(G, b1, c1, d1, a1, b2, c2, d2, a2, 0, 20, 0xe9b6c7aa)
203
17.8k
    MD5_STEP_X2(G, a1, b1, c1, d1, a2, b2, c2, d2, 5, 5, 0xd62f105d)
204
17.8k
    MD5_STEP_X2(G, d1, a1, b1, c1, d2, a2, b2, c2, 10, 9, 0x02441453)
205
17.8k
    MD5_STEP_X2(G, c1, d1, a1, b1, c2, d2, a2, b2, 15, 14, 0xd8a1e681)
206
17.8k
    MD5_STEP_X2(G, b1, c1, d1, a1, b2, c2, d2, a2, 4, 20, 0xe7d3fbc8)
207
17.8k
    MD5_STEP_X2(G, a1, b1, c1, d1, a2, b2, c2, d2, 9, 5, 0x21e1cde6)
208
17.8k
    MD5_STEP_X2(G, d1, a1, b1, c1, d2, a2, b2, c2, 14, 9, 0xc33707d6)
209
17.8k
    MD5_STEP_X2(G, c1, d1, a1, b1, c2, d2, a2, b2, 3, 14, 0xf4d50d87)
210
17.8k
    MD5_STEP_X2(G, b1, c1, d1, a1, b2, c2, d2, a2, 8, 20, 0x455a14ed)
211
17.8k
    MD5_STEP_X2(G, a1, b1, c1, d1, a2, b2, c2, d2, 13, 5, 0xa9e3e905)
212
17.8k
    MD5_STEP_X2(G, d1, a1, b1, c1, d2, a2, b2, c2, 2, 9, 0xfcefa3f8)
213
17.8k
    MD5_STEP_X2(G, c1, d1, a1, b1, c2, d2, a2, b2, 7, 14, 0x676f02d9)
214
17.8k
    MD5_STEP_X2(G, b1, c1, d1, a1, b2, c2, d2, a2, 12, 20, 0x8d2a4c8a)
215
216
17.8k
    MD5_STEP_X2(H, a1, b1, c1, d1, a2, b2, c2, d2, 5, 4, 0xfffa3942)
217
17.8k
    MD5_STEP_X2(H, d1, a1, b1, c1, d2, a2, b2, c2, 8, 11, 0x8771f681)
218
17.8k
    MD5_STEP_X2(H, c1, d1, a1, b1, c2, d2, a2, b2, 11, 16, 0x6d9d6122)
219
17.8k
    MD5_STEP_X2(H, b1, c1, d1, a1, b2, c2, d2, a2, 14, 23, 0xfde5380c)
220
17.8k
    MD5_STEP_X2(H, a1, b1, c1, d1, a2, b2, c2, d2, 1, 4, 0xa4beea44)
221
17.8k
    MD5_STEP_X2(H, d1, a1, b1, c1, d2, a2, b2, c2, 4, 11, 0x4bdecfa9)
222
17.8k
    MD5_STEP_X2(H, c1, d1, a1, b1, c2, d2, a2, b2, 7, 16, 0xf6bb4b60)
223
17.8k
    MD5_STEP_X2(H, b1, c1, d1, a1, b2, c2, d2, a2, 10, 23, 0xbebfbc70)
224
17.8k
    MD5_STEP_X2(H, a1, b1, c1, d1, a2, b2, c2, d2, 13, 4, 0x289b7ec6)
225
17.8k
    MD5_STEP_X2(H, d1, a1, b1, c1, d2, a2, b2, c2, 0, 11, 0xeaa127fa)
226
17.8k
    MD5_STEP_X2(H, c1, d1, a1, b1, c2, d2, a2, b2, 3, 16, 0xd4ef3085)
227
17.8k
    MD5_STEP_X2(H, b1, c1, d1, a1, b2, c2, d2, a2, 6, 23, 0x04881d05)
228
17.8k
    MD5_STEP_X2(H, a1, b1, c1, d1, a2, b2, c2, d2, 9, 4, 0xd9d4d039)
229
17.8k
    MD5_STEP_X2(H, d1, a1, b1, c1, d2, a2, b2, c2, 12, 11, 0xe6db99e5)
230
17.8k
    MD5_STEP_X2(H, c1, d1, a1, b1, c2, d2, a2, b2, 15, 16, 0x1fa27cf8)
231
17.8k
    MD5_STEP_X2(H, b1, c1, d1, a1, b2, c2, d2, a2, 2, 23, 0xc4ac5665)
232
233
17.8k
    MD5_STEP_X2(I, a1, b1, c1, d1, a2, b2, c2, d2, 0, 6, 0xf4292244)
234
17.8k
    MD5_STEP_X2(I, d1, a1, b1, c1, d2, a2, b2, c2, 7, 10, 0x432aff97)
235
17.8k
    MD5_STEP_X2(I, c1, d1, a1, b1, c2, d2, a2, b2, 14, 15, 0xab9423a7)
236
17.8k
    MD5_STEP_X2(I, b1, c1, d1, a1, b2, c2, d2, a2, 5, 21, 0xfc93a039)
237
17.8k
    MD5_STEP_X2(I, a1, b1, c1, d1, a2, b2, c2, d2, 12, 6, 0x655b59c3)
238
17.8k
    MD5_STEP_X2(I, d1, a1, b1, c1, d2, a2, b2, c2, 3, 10, 0x8f0ccc92)
239
17.8k
    MD5_STEP_X2(I, c1, d1, a1, b1, c2, d2, a2, b2, 10, 15, 0xffeff47d)
240
17.8k
    MD5_STEP_X2(I, b1, c1, d1, a1, b2, c2, d2, a2, 1, 21, 0x85845dd1)
241
17.8k
    MD5_STEP_X2(I, a1, b1, c1, d1, a2, b2, c2, d2, 8, 6, 0x6fa87e4f)
242
17.8k
    MD5_STEP_X2(I, d1, a1, b1, c1, d2, a2, b2, c2, 15, 10, 0xfe2ce6e0)
243
17.8k
    MD5_STEP_X2(I, c1, d1, a1, b1, c2, d2, a2, b2, 6, 15, 0xa3014314)
244
17.8k
    MD5_STEP_X2(I, b1, c1, d1, a1, b2, c2, d2, a2, 13, 21, 0x4e0811a1)
245
17.8k
    MD5_STEP_X2(I, a1, b1, c1, d1, a2, b2, c2, d2, 4, 6, 0xf7537e82)
246
17.8k
    MD5_STEP_X2(I, d1, a1, b1, c1, d2, a2, b2, c2, 11, 10, 0xbd3af235)
247
17.8k
    MD5_STEP_X2(I, c1, d1, a1, b1, c2, d2, a2, b2, 2, 15, 0x2ad7d2bb)
248
17.8k
    MD5_STEP_X2(I, b1, c1, d1, a1, b2, c2, d2, a2, 9, 21, 0xeb86d391)
249
250
17.8k
    return {Ops::add(a1, aa1), Ops::add(b1, bb1), Ops::add(c1, cc1), Ops::add(d1, dd1),
251
17.8k
            Ops::add(a2, aa2), Ops::add(b2, bb2), Ops::add(c2, cc2), Ops::add(d2, dd2)};
252
17.8k
}
253
254
#undef MD5_STEP_X2
255
256
template <typename Ops>
257
2.62k
uint32_t extract_lane(typename Ops::Vec v, size_t lane) {
258
2.62k
    alignas(32) uint32_t values[Ops::LANES];
259
2.62k
    Ops::storeu(values, v);
260
2.62k
    return values[lane];
261
2.62k
}
262
263
template <typename Ops>
264
void md5_multi_buffer_compute(const unsigned char* const inputs[], const size_t lengths[],
265
171
                              unsigned char* outputs, size_t count) {
266
171
    constexpr size_t N = Ops::LANES;
267
171
    using Vec = typename Ops::Vec;
268
171
    size_t count1 = std::min(count, N);
269
171
    size_t count2 = count > N ? count - N : 0;
270
271
171
    size_t num_blocks[2 * N];
272
171
    size_t max_blocks = 0;
273
826
    for (size_t i = 0; i < count; ++i) {
274
655
        num_blocks[i] = md5_num_blocks(lengths[i]);
275
655
        max_blocks = std::max(max_blocks, num_blocks[i]);
276
655
    }
277
2.25k
    for (size_t i = count; i < 2 * N; ++i) {
278
2.08k
        num_blocks[i] = 1;
279
2.08k
    }
280
281
171
    alignas(32) unsigned char final_buf[2 * N][128];
282
171
    size_t final_block_start[2 * N];
283
171
    size_t final_block_count[2 * N];
284
826
    for (size_t i = 0; i < count; ++i) {
285
655
        final_block_start[i] = lengths[i] / 64;
286
655
        final_block_count[i] = md5_pad_final_blocks(inputs[i], lengths[i], final_buf[i]);
287
655
    }
288
2.25k
    for (size_t i = count; i < 2 * N; ++i) {
289
2.08k
        final_block_start[i] = 0;
290
2.08k
        final_block_count[i] = md5_pad_final_blocks(&MD5_DUMMY_INPUT, 0, final_buf[i]);
291
2.08k
    }
292
293
171
    Vec a1 = Ops::set1(MD5_A0);
294
171
    Vec b1 = Ops::set1(MD5_B0);
295
171
    Vec c1 = Ops::set1(MD5_C0);
296
171
    Vec d1 = Ops::set1(MD5_D0);
297
171
    Vec a2 = Ops::set1(MD5_A0);
298
171
    Vec b2 = Ops::set1(MD5_B0);
299
171
    Vec c2 = Ops::set1(MD5_C0);
300
171
    Vec d2 = Ops::set1(MD5_D0);
301
302
18.0k
    for (size_t block = 0; block < max_blocks; ++block) {
303
17.8k
        const unsigned char* block_ptrs[2 * N];
304
303k
        for (size_t i = 0; i < 2 * N; ++i) {
305
286k
            if (block < final_block_start[i]) {
306
17.9k
                block_ptrs[i] = inputs[i] + block * 64;
307
268k
            } else {
308
268k
                size_t final_index = block - final_block_start[i];
309
268k
                block_ptrs[i] = final_index < final_block_count[i] ? final_buf[i] + final_index * 64
310
268k
                                                                   : final_buf[i];
311
268k
            }
312
286k
        }
313
314
17.8k
        Vec msg1[16];
315
17.8k
        Vec msg2[16];
316
17.8k
        Ops::gather_all_message_words(block_ptrs, msg1);
317
17.8k
        Ops::gather_all_message_words(block_ptrs + N, msg2);
318
319
17.8k
        auto st = md5_multi_buffer_block_x2<Ops>(a1, b1, c1, d1, a2, b2, c2, d2, msg1, msg2);
320
17.8k
        a1 = st.a1;
321
17.8k
        b1 = st.b1;
322
17.8k
        c1 = st.c1;
323
17.8k
        d1 = st.d1;
324
17.8k
        a2 = st.a2;
325
17.8k
        b2 = st.b2;
326
17.8k
        c2 = st.c2;
327
17.8k
        d2 = st.d2;
328
329
53.3k
        for (size_t lane = 0; lane < count1; ++lane) {
330
35.5k
            if (block + 1 == num_blocks[lane]) {
331
477
                unsigned char* out = outputs + lane * MD5_DIGEST_LENGTH;
332
477
                LittleEndian::Store32(out, extract_lane<Ops>(a1, lane));
333
477
                LittleEndian::Store32(out + 4, extract_lane<Ops>(b1, lane));
334
477
                LittleEndian::Store32(out + 8, extract_lane<Ops>(c1, lane));
335
477
                LittleEndian::Store32(out + 12, extract_lane<Ops>(d1, lane));
336
477
            }
337
35.5k
        }
338
18.5k
        for (size_t lane = 0; lane < count2; ++lane) {
339
670
            if (block + 1 == num_blocks[N + lane]) {
340
178
                unsigned char* out = outputs + (N + lane) * MD5_DIGEST_LENGTH;
341
178
                LittleEndian::Store32(out, extract_lane<Ops>(a2, lane));
342
178
                LittleEndian::Store32(out + 4, extract_lane<Ops>(b2, lane));
343
178
                LittleEndian::Store32(out + 8, extract_lane<Ops>(c2, lane));
344
178
                LittleEndian::Store32(out + 12, extract_lane<Ops>(d2, lane));
345
178
            }
346
670
        }
347
17.8k
    }
348
171
}
349
350
void md5_binary_batch_avx2(const unsigned char* const inputs[], const size_t lengths[],
351
156
                           unsigned char* outputs, size_t count) {
352
156
    constexpr size_t BATCH = 2 * AVX2MD5Ops::LANES;
353
327
    for (size_t base = 0; base < count; base += BATCH) {
354
171
        size_t batch = std::min(BATCH, count - base);
355
171
        const unsigned char* batch_inputs[BATCH];
356
171
        size_t batch_lengths[BATCH];
357
826
        for (size_t i = 0; i < batch; ++i) {
358
655
            batch_inputs[i] = lengths[base + i] == 0 ? &MD5_DUMMY_INPUT : inputs[base + i];
359
655
            batch_lengths[i] = lengths[base + i];
360
655
        }
361
2.25k
        for (size_t i = batch; i < BATCH; ++i) {
362
2.08k
            batch_inputs[i] = &MD5_DUMMY_INPUT;
363
2.08k
            batch_lengths[i] = 0;
364
2.08k
        }
365
171
        md5_multi_buffer_compute<AVX2MD5Ops>(batch_inputs, batch_lengths,
366
171
                                             outputs + base * MD5_DIGEST_LENGTH, batch);
367
171
    }
368
156
}
369
370
#endif
371
372
} // namespace
373
374
1.00k
Md5Digest::Md5Digest() {
375
1.00k
    MD5_Init(&_md5_ctx);
376
1.00k
}
377
378
2.74k
void Md5Digest::update(const void* data, size_t length) {
379
2.74k
    MD5_Update(&_md5_ctx, data, length);
380
2.74k
}
381
382
1.00k
void Md5Digest::digest() {
383
1.00k
    unsigned char buf[MD5_DIGEST_LENGTH];
384
1.00k
    MD5_Final(buf, &_md5_ctx);
385
386
1.00k
    char hex_buf[MD5_HEX_LENGTH];
387
1.00k
    md5_to_hex(buf, hex_buf);
388
1.00k
    _hex.assign(hex_buf, MD5_HEX_LENGTH);
389
1.00k
}
390
391
void md5_hex_batch(const unsigned char* const inputs[], const size_t lengths[], char* outputs,
392
157
                   size_t count) {
393
157
    if (count == 0) {
394
1
        return;
395
1
    }
396
397
156
#ifdef __AVX2__
398
156
    std::vector<unsigned char> digests(count * MD5_DIGEST_LENGTH);
399
156
    md5_binary_batch_avx2(inputs, lengths, digests.data(), count);
400
811
    for (size_t i = 0; i < count; ++i) {
401
655
        md5_to_hex(digests.data() + i * MD5_DIGEST_LENGTH, outputs + i * MD5_HEX_LENGTH);
402
655
    }
403
#else
404
    for (size_t i = 0; i < count; ++i) {
405
        unsigned char digest[MD5_DIGEST_LENGTH];
406
        MD5(lengths[i] == 0 ? &MD5_DUMMY_INPUT : inputs[i], lengths[i], digest);
407
        md5_to_hex(digest, outputs + i * MD5_HEX_LENGTH);
408
    }
409
#endif
410
156
}
411
412
} // namespace doris