Coverage Report

Created: 2024-11-20 21:21

/root/doris/be/src/gutil/strings/charset.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 Google Inc. All Rights Reserved.
2
3
#pragma once
4
5
#include "gutil/integral_types.h"
6
7
namespace strings {
8
9
// A CharSet is a simple map from (1-byte) characters to Booleans. It simply
10
// exposes the mechanism of checking if a given character is in the set, fairly
11
// efficiently. Useful for string tokenizing routines.
12
//
13
// Run on asherah (2 X 2400 MHz CPUs); 2008/11/10-13:18:03
14
// CPU: Intel Core2 (2 cores) dL1:32KB dL2:4096KB
15
// ***WARNING*** CPU scaling is enabled, the benchmark timings may be noisy,
16
// Benchmark                Time(ns)    CPU(ns) Iterations
17
// -------------------------------------------------------
18
// BM_CharSetTesting/1K           21         21   32563138
19
// BM_CharSetTesting/4K           21         21   31968433
20
// BM_CharSetTesting/32K          21         21   32114953
21
// BM_CharSetTesting/256K         22         22   31679082
22
// BM_CharSetTesting/1M           21         21   32563138
23
//
24
// This class is thread-compatible.
25
//
26
// This class has an implicit constructor.
27
// Style guide exception granted:
28
// http://goto/style-guide-exception-20978288
29
30
class CharSet {
31
public:
32
    // Initialize a CharSet containing no characters or the given set of
33
    // characters, respectively.
34
    CharSet();
35
    // Deliberately an implicit constructor, so anything that takes a CharSet
36
    // can also take an explicit list of characters.
37
    CharSet(const char* characters); // NOLINT(runtime/explicit)
38
    explicit CharSet(const CharSet& other);
39
40
    // Add or remove a character from the set.
41
0
    void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); }
42
0
    void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); }
43
44
    // Return true if this character is in the set
45
0
    bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); }
46
47
private:
48
    // The numbers below are optimized for 64-bit hardware. TODO(user): In the
49
    // future, we should change this to use uword_t and do various bits of magic
50
    // to calculate the numbers at compile time.
51
52
    // In general,
53
    // static const int kNumWords = max(32 / sizeof(uword_t), 1);
54
    uint64 bits_[4];
55
56
    // 4 words => the high 2 bits of c are the word number. In general,
57
    // kShiftValue = 8 - log2(kNumWords)
58
0
    static int Word(unsigned char c) { return c >> 6; }
59
60
    // And the value we AND with c is ((1 << shift value) - 1)
61
    // static const int kLowBitsMask = (256 / kNumWords) - 1;
62
0
    static uint64 BitMask(unsigned char c) {
63
0
        uint64 mask = 1;
64
0
        return mask << (c & 0x3f);
65
0
    }
66
};
67
68
} // namespace strings