Coverage Report

Created: 2026-03-15 17:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/string_view.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/facebookincubator/velox/blob/main/velox/type/StringView.h
19
// And modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
25
#include <cstddef>
26
#include <cstdint>
27
#include <cstring>
28
#include <string>
29
30
#include "core/string_ref.h"
31
32
namespace doris {
33
// Variable length string or binary type for use in vectors. This has
34
// semantics similar to std::string_view or folly::StringPiece and
35
// exposes a subset of the interface. If the string is 12 characters
36
// or less, it is inlined and no reference is held. If it is longer, a
37
// reference to the string is held and the 4 first characters are
38
// cached in the StringView. This allows failing comparisons early and
39
// reduces the CPU cache working set when dealing with short strings.
40
41
class StringView {
42
#include "common/compile_check_begin.h"
43
public:
44
    using value_type = char;
45
    static constexpr size_t kPrefixSize = 4 * sizeof(char);
46
    static constexpr size_t kInlineSize = 12;
47
48
2.55k
    StringView() {
49
2.55k
        static_assert(sizeof(StringView) == 16);
50
2.55k
        memset(this, 0, sizeof(StringView));
51
2.55k
    }
52
53
7.21k
    StringView(const char* data, uint32_t len) : size_(len) {
54
7.21k
        DCHECK_GE(len, 0);
55
7.21k
        DCHECK(data || len == 0);
56
7.21k
        if (isInline()) {
57
            // Zero the inline part.
58
            // this makes sure that inline strings can be compared for equality with 2
59
            // int64 compares.
60
6.33k
            memset(prefix_, 0, kPrefixSize);
61
6.33k
            if (size_ == 0) {
62
2.06k
                return;
63
2.06k
            }
64
            // small string: inlined. Zero the last 8 bytes first to allow for whole
65
            // word comparison.
66
4.26k
            value_.data = nullptr;
67
4.26k
            memcpy(prefix_, data, size_);
68
4.26k
        } else {
69
            // large string: store pointer
70
876
            memcpy(prefix_, data, kPrefixSize);
71
876
            value_.data = data;
72
876
        }
73
7.21k
    }
74
75
    StringView(unsigned char* data, uint32_t len)
76
1
            : StringView(reinterpret_cast<const char*>(data), len) {}
77
78
14.2k
    bool isInline() const { return isInline(size_); }
79
80
14.3k
    ALWAYS_INLINE static constexpr bool isInline(uint32_t size) { return size <= kInlineSize; }
81
82
    explicit StringView(std::string&& value) = delete;
83
    explicit StringView(const std::string& value)
84
33
            : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
85
    explicit StringView(std::string_view value)
86
7
            : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
87
    /* implicit */ StringView(const char* data)
88
499
            : StringView(data, cast_set<uint32_t>(strlen(data))) {}
89
2.47k
    doris::StringRef to_string_ref() const { return {data(), size()}; }
90
91
    operator std::string_view() && = delete;
92
1
    explicit operator std::string_view() const& { return {data(), size()}; }
93
6
    operator std::string() const { return std::string(data(), size()); }
94
2
    std::string str() const { return *this; }
95
96
    const char* data() && = delete;
97
6.60k
    const char* data() const& { return isInline() ? prefix_ : value_.data; }
98
99
7.30k
    uint32_t size() const { return size_; }
100
3
    bool empty() const { return size() == 0; }
101
102
14
    void set_size(uint32_t size) { size_ = size; }
103
104
    bool operator==(const StringView& other) const;
105
2
    friend std::ostream& operator<<(std::ostream& os, const StringView& stringView) {
106
2
        os.write(stringView.data(), stringView.size());
107
2
        return os;
108
2
    }
109
8
    auto operator<=>(const StringView& other) const {
110
8
        const auto cmp = compare(other);
111
8
        return cmp < 0   ? std::strong_ordering::less
112
8
               : cmp > 0 ? std::strong_ordering::greater
113
4
                         : std::strong_ordering::equal;
114
8
    }
115
116
    // Returns 0, if this == other
117
    //       < 0, if this < other
118
    //       > 0, if this > other
119
    int32_t compare(const StringView& other) const;
120
121
    const char* begin() && = delete;
122
2
    const char* begin() const& { return data(); }
123
    const char* end() && = delete;
124
2
    const char* end() const& { return data() + size(); }
125
126
18
    std::string dump_hex() const {
127
18
        static const char* kHex = "0123456789ABCDEF";
128
18
        std::string out;
129
18
        out.reserve(size_ * 2 + 2);
130
18
        out.push_back('0');
131
18
        out.push_back('x');
132
18
        const char* ptr = data();
133
163
        for (uint32_t i = 0; i < size_; ++i) {
134
145
            auto c = static_cast<unsigned char>(ptr[i]);
135
145
            out.push_back(kHex[c >> 4]);
136
145
            out.push_back(kHex[c & 0x0F]);
137
145
        }
138
18
        return out;
139
18
    }
140
141
private:
142
16
    inline int64_t size_and_prefix_as_int64() const {
143
16
        return reinterpret_cast<const int64_t*>(this)[0];
144
16
    }
145
146
4
    inline int64_t inlined_as_int64() const { return reinterpret_cast<const int64_t*>(this)[1]; }
147
148
2.47k
    int32_t prefix_as_int() const { return *reinterpret_cast<const int32_t*>(&prefix_); }
149
150
    uint32_t size_;
151
    char prefix_[4];
152
    union {
153
        char inlined[8];
154
        const char* data;
155
    } value_;
156
};
157
#include "common/compile_check_end.h"
158
} // namespace doris