Coverage Report

Created: 2026-03-15 17:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/memcpy_small.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcpySmall.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
#include <string.h>
25
26
#include <cstdint>
27
#include <memory>
28
29
#if defined(__SSE2__) || defined(__aarch64__)
30
#include "util/sse_util.hpp"
31
32
/** memcpy function could work suboptimal if all the following conditions are met:
33
  * 1. Size of memory region is relatively small (approximately, under 50 bytes).
34
  * 2. Size of memory region is not known at compile-time.
35
  *
36
  * In that case, memcpy works suboptimal by following reasons:
37
  * 1. Function is not inlined.
38
  * 2. Much time/instructions are spend to process "tails" of data.
39
  *
40
  * There are cases when function could be implemented in more optimal way, with help of some assumptions.
41
  * One of that assumptions - ability to read and write some number of bytes after end of passed memory regions.
42
  * Under that assumption, it is possible not to implement difficult code to process tails of data and do copy always by big chunks.
43
  *
44
  * This case is typical, for example, when many small pieces of data are gathered to single contiguous piece of memory in a loop.
45
  * - because each next copy will overwrite excessive data after previous copy.
46
  *
47
  * Assumption that size of memory region is small enough allows us to not unroll the loop.
48
  * This is slower, when size of memory is actually big.
49
  *
50
  * Use with caution.
51
  */
52
53
namespace doris::detail {
54
inline void memcpy_small_allow_read_write_overflow15_impl(char* __restrict dst,
55
1.16M
                                                          const char* __restrict src, ssize_t n) {
56
41.3M
    while (n > 0) {
57
40.1M
        _mm_storeu_si128(reinterpret_cast<__m128i*>(dst),
58
40.1M
                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
59
60
40.1M
        dst += 16;
61
40.1M
        src += 16;
62
40.1M
        n -= 16;
63
40.1M
    }
64
1.16M
}
65
} // namespace doris::detail
66
67
/** Works under assumption, that it's possible to read up to 15 excessive bytes after end of 'src' region
68
  *  and to write any garbage into up to 15 bytes after end of 'dst' region.
69
  */
70
inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst,
71
1.16M
                                                     const void* __restrict src, size_t n) {
72
1.16M
    doris::detail::memcpy_small_allow_read_write_overflow15_impl(
73
1.16M
            reinterpret_cast<char*>(dst), reinterpret_cast<const char*>(src), n);
74
1.16M
}
75
76
/** NOTE There was also a function, that assumes, that you could read any bytes inside same memory page of src.
77
  * This function was unused, and also it requires special handling for Valgrind and ASan.
78
  */
79
80
#else /// Implementation for other platforms.
81
82
inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst,
83
                                                     const void* __restrict src, size_t n) {
84
    memcpy(dst, src, n);
85
}
86
87
#endif
88
89
// assume input address not aligned by default
90
// hint to compiler that we are copying fixed size data, so it can optimize the copy using SIMD instructions if possible.
91
template <typename T, bool aligned = false>
92
14.5M
void memcpy_fixed(char* lhs, const char* rhs) {
93
14.5M
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
137k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
137k
               sizeof(T));
97
14.3M
    } else {
98
14.3M
        memcpy(lhs, rhs, sizeof(T));
99
14.3M
    }
100
14.5M
}
_Z12memcpy_fixedItLb0EEvPcPKc
Line
Count
Source
92
446
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
446
    } else {
98
446
        memcpy(lhs, rhs, sizeof(T));
99
446
    }
100
446
}
_Z12memcpy_fixedIhLb0EEvPcPKc
Line
Count
Source
92
45.8k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
45.8k
    } else {
98
45.8k
        memcpy(lhs, rhs, sizeof(T));
99
45.8k
    }
100
45.8k
}
_Z12memcpy_fixedIjLb0EEvPcPKc
Line
Count
Source
92
694k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
694k
    } else {
98
694k
        memcpy(lhs, rhs, sizeof(T));
99
694k
    }
100
694k
}
_Z12memcpy_fixedIdLb0EEvPcPKc
Line
Count
Source
92
5.50k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
5.50k
    } else {
98
5.50k
        memcpy(lhs, rhs, sizeof(T));
99
5.50k
    }
100
5.50k
}
_Z12memcpy_fixedIlLb0EEvPcPKc
Line
Count
Source
92
6.76M
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
6.76M
    } else {
98
6.76M
        memcpy(lhs, rhs, sizeof(T));
99
6.76M
    }
100
6.76M
}
_Z12memcpy_fixedIiLb0EEvPcPKc
Line
Count
Source
92
4.08k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
4.08k
    } else {
98
4.08k
        memcpy(lhs, rhs, sizeof(T));
99
4.08k
    }
100
4.08k
}
_Z12memcpy_fixedIbLb0EEvPcPKc
Line
Count
Source
92
7
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
7
    } else {
98
7
        memcpy(lhs, rhs, sizeof(T));
99
7
    }
100
7
}
_Z12memcpy_fixedIhLb1EEvPcPKc
Line
Count
Source
92
17.7k
void memcpy_fixed(char* lhs, const char* rhs) {
93
17.7k
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
17.7k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
17.7k
               sizeof(T));
97
    } else {
98
        memcpy(lhs, rhs, sizeof(T));
99
    }
100
17.7k
}
_Z12memcpy_fixedItLb1EEvPcPKc
Line
Count
Source
92
8.43k
void memcpy_fixed(char* lhs, const char* rhs) {
93
8.43k
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
8.43k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
8.43k
               sizeof(T));
97
    } else {
98
        memcpy(lhs, rhs, sizeof(T));
99
    }
100
8.43k
}
_Z12memcpy_fixedIjLb1EEvPcPKc
Line
Count
Source
92
32.9k
void memcpy_fixed(char* lhs, const char* rhs) {
93
32.9k
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
32.9k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
32.9k
               sizeof(T));
97
    } else {
98
        memcpy(lhs, rhs, sizeof(T));
99
    }
100
32.9k
}
_Z12memcpy_fixedImLb1EEvPcPKc
Line
Count
Source
92
57.7k
void memcpy_fixed(char* lhs, const char* rhs) {
93
57.7k
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
57.7k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
57.7k
               sizeof(T));
97
    } else {
98
        memcpy(lhs, rhs, sizeof(T));
99
    }
100
57.7k
}
_Z12memcpy_fixedImLb0EEvPcPKc
Line
Count
Source
92
37.1k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
37.1k
    } else {
98
37.1k
        memcpy(lhs, rhs, sizeof(T));
99
37.1k
    }
100
37.1k
}
_Z12memcpy_fixedIN4wide7integerILm128EjEELb1EEvPcPKc
Line
Count
Source
92
20.6k
void memcpy_fixed(char* lhs, const char* rhs) {
93
20.6k
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
20.6k
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
20.6k
               sizeof(T));
97
    } else {
98
        memcpy(lhs, rhs, sizeof(T));
99
    }
100
20.6k
}
_Z12memcpy_fixedIN4wide7integerILm128EjEELb0EEvPcPKc
Line
Count
Source
92
5.99k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
5.99k
    } else {
98
5.99k
        memcpy(lhs, rhs, sizeof(T));
99
5.99k
    }
100
5.99k
}
_Z12memcpy_fixedIN5doris7DecimalIiEELb0EEvPcPKc
Line
Count
Source
92
1.24k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
1.24k
    } else {
98
1.24k
        memcpy(lhs, rhs, sizeof(T));
99
1.24k
    }
100
1.24k
}
_Z12memcpy_fixedIN5doris7DecimalIlEELb0EEvPcPKc
Line
Count
Source
92
6.77M
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
6.77M
    } else {
98
6.77M
        memcpy(lhs, rhs, sizeof(T));
99
6.77M
    }
100
6.77M
}
_Z12memcpy_fixedIN5doris14DecimalV2ValueELb0EEvPcPKc
Line
Count
Source
92
6.47k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
6.47k
    } else {
98
6.47k
        memcpy(lhs, rhs, sizeof(T));
99
6.47k
    }
100
6.47k
}
_Z12memcpy_fixedIN5doris12Decimal128V3ELb0EEvPcPKc
Line
Count
Source
92
5.48k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
5.48k
    } else {
98
5.48k
        memcpy(lhs, rhs, sizeof(T));
99
5.48k
    }
100
5.48k
}
_Z12memcpy_fixedIN5doris7DecimalIN4wide7integerILm256EiEEEELb0EEvPcPKc
Line
Count
Source
92
4.37k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
4.37k
    } else {
98
4.37k
        memcpy(lhs, rhs, sizeof(T));
99
4.37k
    }
100
4.37k
}
_Z12memcpy_fixedIaLb0EEvPcPKc
Line
Count
Source
92
3.94k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
3.94k
    } else {
98
3.94k
        memcpy(lhs, rhs, sizeof(T));
99
3.94k
    }
100
3.94k
}
_Z12memcpy_fixedIsLb0EEvPcPKc
Line
Count
Source
92
3.50k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
3.50k
    } else {
98
3.50k
        memcpy(lhs, rhs, sizeof(T));
99
3.50k
    }
100
3.50k
}
_Z12memcpy_fixedInLb0EEvPcPKc
Line
Count
Source
92
3.37k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
3.37k
    } else {
98
3.37k
        memcpy(lhs, rhs, sizeof(T));
99
3.37k
    }
100
3.37k
}
_Z12memcpy_fixedIfLb0EEvPcPKc
Line
Count
Source
92
3.35k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
3.35k
    } else {
98
3.35k
        memcpy(lhs, rhs, sizeof(T));
99
3.35k
    }
100
3.35k
}
_Z12memcpy_fixedIoLb0EEvPcPKc
Line
Count
Source
92
3.33k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
3.33k
    } else {
98
3.33k
        memcpy(lhs, rhs, sizeof(T));
99
3.33k
    }
100
3.33k
}
_Z12memcpy_fixedIN5doris16VecDateTimeValueELb0EEvPcPKc
Line
Count
Source
92
4.68k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
4.68k
    } else {
98
4.68k
        memcpy(lhs, rhs, sizeof(T));
99
4.68k
    }
100
4.68k
}
_Z12memcpy_fixedIN5doris11DateV2ValueINS0_15DateV2ValueTypeEEELb0EEvPcPKc
Line
Count
Source
92
2.24k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
2.24k
    } else {
98
2.24k
        memcpy(lhs, rhs, sizeof(T));
99
2.24k
    }
100
2.24k
}
_Z12memcpy_fixedIN5doris11DateV2ValueINS0_19DateTimeV2ValueTypeEEELb0EEvPcPKc
Line
Count
Source
92
4.29k
void memcpy_fixed(char* lhs, const char* rhs) {
93
    if constexpr (aligned) {
94
        // hint aligned address to compiler
95
        memcpy(std::assume_aligned<alignof(T)>(lhs), std::assume_aligned<alignof(T)>(rhs),
96
               sizeof(T));
97
4.29k
    } else {
98
4.29k
        memcpy(lhs, rhs, sizeof(T));
99
4.29k
    }
100
4.29k
}
Unexecuted instantiation: _Z12memcpy_fixedIN5doris16TimestampTzValueELb0EEvPcPKc
Unexecuted instantiation: _Z12memcpy_fixedIN5doris10VarMomentsIdLm4EEELb0EEvPcPKc
Unexecuted instantiation: _Z12memcpy_fixedIN5doris10VarMomentsIdLm3EEELb0EEvPcPKc
Unexecuted instantiation: _Z12memcpy_fixedIN5doris7DecimalInEELb0EEvPcPKc
101
102
template <int max_size>
103
67
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
104
67
    DCHECK_NE(n, 0);
105
67
    if constexpr (max_size >= 4) {
106
67
        if (n >= 4) {
107
59
            memcpy_fixed<uint32_t>(lhs, rhs);
108
59
            lhs += 4;
109
59
            rhs += 4;
110
59
            n -= 4;
111
59
        }
112
67
    }
113
327
    while (n >= 1) {
114
260
        memcpy_fixed<uint8_t>(lhs, rhs);
115
260
        lhs++;
116
260
        rhs++;
117
260
        n--;
118
260
    }
119
67
}
_Z12memcpy_smallILi4EEvPcPKcm
Line
Count
Source
103
10
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
104
10
    DCHECK_NE(n, 0);
105
10
    if constexpr (max_size >= 4) {
106
10
        if (n >= 4) {
107
2
            memcpy_fixed<uint32_t>(lhs, rhs);
108
2
            lhs += 4;
109
2
            rhs += 4;
110
2
            n -= 4;
111
2
        }
112
10
    }
113
34
    while (n >= 1) {
114
24
        memcpy_fixed<uint8_t>(lhs, rhs);
115
24
        lhs++;
116
24
        rhs++;
117
24
        n--;
118
24
    }
119
10
}
_Z12memcpy_smallILi8EEvPcPKcm
Line
Count
Source
103
17
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
104
17
    DCHECK_NE(n, 0);
105
17
    if constexpr (max_size >= 4) {
106
17
        if (n >= 4) {
107
17
            memcpy_fixed<uint32_t>(lhs, rhs);
108
17
            lhs += 4;
109
17
            rhs += 4;
110
17
            n -= 4;
111
17
        }
112
17
    }
113
37
    while (n >= 1) {
114
20
        memcpy_fixed<uint8_t>(lhs, rhs);
115
20
        lhs++;
116
20
        rhs++;
117
20
        n--;
118
20
    }
119
17
}
_Z12memcpy_smallILi16EEvPcPKcm
Line
Count
Source
103
40
inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
104
40
    DCHECK_NE(n, 0);
105
40
    if constexpr (max_size >= 4) {
106
40
        if (n >= 4) {
107
40
            memcpy_fixed<uint32_t>(lhs, rhs);
108
40
            lhs += 4;
109
40
            rhs += 4;
110
40
            n -= 4;
111
40
        }
112
40
    }
113
256
    while (n >= 1) {
114
216
        memcpy_fixed<uint8_t>(lhs, rhs);
115
216
        lhs++;
116
216
        rhs++;
117
216
        n--;
118
216
    }
119
40
}
120
121
template <>
122
14.2k
inline void memcpy_small<2>(char* lhs, const char* rhs, size_t n) {
123
14.2k
    DCHECK_NE(n, 0);
124
14.2k
    if (n == 2) {
125
2
        memcpy_fixed<uint16_t>(lhs, rhs);
126
14.2k
    } else {
127
14.2k
        memcpy_fixed<uint8_t>(lhs, rhs);
128
14.2k
    }
129
14.2k
}