Coverage Report

Created: 2024-11-18 10:37

/root/doris/be/src/util/block_compression.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/parquet_types.h>
21
22
#include <cstddef>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "util/slice.h"
27
28
namespace doris {
29
class faststring;
30
31
namespace segment_v2 {
32
enum CompressionTypePB : int;
33
} // namespace segment_v2
34
35
// This class is used to encapsulate Compression/Decompression algorithm.
36
// This class only used to compress a block data, which means all data
37
// should given when call compress or decompress. This class don't handle
38
// stream compression.
39
//
40
// NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads
41
//
42
43
// max compression reuse buffer size
44
// if max_compress_len is bigger than this, don't use faststring in context
45
const static int MAX_COMPRESSION_BUFFER_SIZE_FOR_REUSE = 1024 * 1024 * 8;
46
class BlockCompressionCodec {
47
public:
48
6
    virtual ~BlockCompressionCodec() {}
49
50
0
    virtual Status init() { return Status::OK(); }
51
52
    // This function will compress input data into output.
53
    // output should be preallocated, and its capacity must be large enough
54
    // for compressed input, which can be get through max_compressed_len function.
55
    // Size of compressed data will be set in output's size.
56
    virtual Status compress(const Slice& input, faststring* output) = 0;
57
58
    // Default implementation will merge input list into a big buffer and call
59
    // compress(Slice) to finish compression. If compression type support digesting
60
    // slice one by one, it should reimplement this function.
61
    virtual Status compress(const std::vector<Slice>& input, size_t uncompressed_size,
62
                            faststring* output);
63
64
    // Decompress input data into output, output's capacity should be large enough
65
    // for decompressed data.
66
    // Size of decompressed data will be set in output's size.
67
    virtual Status decompress(const Slice& input, Slice* output) = 0;
68
69
    // Returns an upper bound on the max compressed length.
70
    virtual size_t max_compressed_len(size_t len) = 0;
71
72
    virtual bool exceed_max_compress_len(size_t uncompressed_size);
73
};
74
75
// Get a BlockCompressionCodec through type.
76
// Return Status::OK if a valid codec is found. If codec is null, it means it is
77
// NO_COMPRESSION. If codec is not null, user can use it to compress/decompress
78
// data.
79
//
80
// NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads
81
//
82
// Return not OK, if error happens.
83
Status get_block_compression_codec(segment_v2::CompressionTypePB type,
84
                                   BlockCompressionCodec** codec);
85
86
Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_codec,
87
                                   BlockCompressionCodec** codec);
88
89
} // namespace doris