Coverage Report

Created: 2026-04-15 14:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/string_view.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/facebookincubator/velox/blob/main/velox/type/StringView.h
19
// And modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
25
#include <cstddef>
26
#include <cstdint>
27
#include <cstring>
28
#include <string>
29
30
#include "core/string_ref.h"
31
32
namespace doris {
33
// Variable length string or binary type for use in vectors. This has
34
// semantics similar to std::string_view or folly::StringPiece and
35
// exposes a subset of the interface. If the string is 12 characters
36
// or less, it is inlined and no reference is held. If it is longer, a
37
// reference to the string is held and the 4 first characters are
38
// cached in the StringView. This allows failing comparisons early and
39
// reduces the CPU cache working set when dealing with short strings.
40
41
class StringView {
42
public:
43
    using value_type = char;
44
    static constexpr size_t kPrefixSize = 4 * sizeof(char);
45
    static constexpr size_t kInlineSize = 12;
46
47
2.55k
    StringView() {
48
2.55k
        static_assert(sizeof(StringView) == 16);
49
2.55k
        memset(this, 0, sizeof(StringView));
50
2.55k
    }
51
52
7.21k
    StringView(const char* data, uint32_t len) : size_(len) {
53
7.21k
        DCHECK_GE(len, 0);
54
7.21k
        DCHECK(data || len == 0);
55
7.21k
        if (isInline()) {
56
            // Zero the inline part.
57
            // this makes sure that inline strings can be compared for equality with 2
58
            // int64 compares.
59
6.33k
            memset(prefix_, 0, kPrefixSize);
60
6.33k
            if (size_ == 0) {
61
2.06k
                return;
62
2.06k
            }
63
            // small string: inlined. Zero the last 8 bytes first to allow for whole
64
            // word comparison.
65
4.26k
            value_.data = nullptr;
66
4.26k
            memcpy(prefix_, data, size_);
67
4.26k
        } else {
68
            // large string: store pointer
69
876
            memcpy(prefix_, data, kPrefixSize);
70
876
            value_.data = data;
71
876
        }
72
7.21k
    }
73
74
    StringView(unsigned char* data, uint32_t len)
75
1
            : StringView(reinterpret_cast<const char*>(data), len) {}
76
77
14.2k
    bool isInline() const { return isInline(size_); }
78
79
14.3k
    ALWAYS_INLINE static constexpr bool isInline(uint32_t size) { return size <= kInlineSize; }
80
81
    explicit StringView(std::string&& value) = delete;
82
    explicit StringView(const std::string& value)
83
33
            : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
84
    explicit StringView(std::string_view value)
85
7
            : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
86
    /* implicit */ StringView(const char* data)
87
499
            : StringView(data, cast_set<uint32_t>(strlen(data))) {}
88
2.47k
    doris::StringRef to_string_ref() const { return {data(), size()}; }
89
90
    operator std::string_view() && = delete;
91
1
    explicit operator std::string_view() const& { return {data(), size()}; }
92
6
    operator std::string() const { return std::string(data(), size()); }
93
2
    std::string str() const { return *this; }
94
95
    const char* data() && = delete;
96
6.60k
    const char* data() const& { return isInline() ? prefix_ : value_.data; }
97
98
7.30k
    uint32_t size() const { return size_; }
99
3
    bool empty() const { return size() == 0; }
100
101
14
    void set_size(uint32_t size) { size_ = size; }
102
103
    bool operator==(const StringView& other) const;
104
2
    friend std::ostream& operator<<(std::ostream& os, const StringView& stringView) {
105
2
        os.write(stringView.data(), stringView.size());
106
2
        return os;
107
2
    }
108
8
    auto operator<=>(const StringView& other) const {
109
8
        const auto cmp = compare(other);
110
8
        return cmp < 0   ? std::strong_ordering::less
111
8
               : cmp > 0 ? std::strong_ordering::greater
112
4
                         : std::strong_ordering::equal;
113
8
    }
114
115
    // Returns 0, if this == other
116
    //       < 0, if this < other
117
    //       > 0, if this > other
118
    int32_t compare(const StringView& other) const;
119
120
    const char* begin() && = delete;
121
2
    const char* begin() const& { return data(); }
122
    const char* end() && = delete;
123
2
    const char* end() const& { return data() + size(); }
124
125
18
    std::string dump_hex() const {
126
18
        static const char* kHex = "0123456789ABCDEF";
127
18
        std::string out;
128
18
        out.reserve(size_ * 2 + 2);
129
18
        out.push_back('0');
130
18
        out.push_back('x');
131
18
        const char* ptr = data();
132
163
        for (uint32_t i = 0; i < size_; ++i) {
133
145
            auto c = static_cast<unsigned char>(ptr[i]);
134
145
            out.push_back(kHex[c >> 4]);
135
145
            out.push_back(kHex[c & 0x0F]);
136
145
        }
137
18
        return out;
138
18
    }
139
140
private:
141
16
    inline int64_t size_and_prefix_as_int64() const {
142
16
        return reinterpret_cast<const int64_t*>(this)[0];
143
16
    }
144
145
4
    inline int64_t inlined_as_int64() const { return reinterpret_cast<const int64_t*>(this)[1]; }
146
147
2.47k
    int32_t prefix_as_int() const { return *reinterpret_cast<const int32_t*>(&prefix_); }
148
149
    uint32_t size_;
150
    char prefix_[4];
151
    union {
152
        char inlined[8];
153
        const char* data;
154
    } value_;
155
};
156
} // namespace doris