Coverage Report

Created: 2024-11-21 15:53

/root/doris/be/src/util/block_compression.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/PlanNodes_types.h>
21
#include <gen_cpp/parquet_types.h>
22
23
#include <cstddef>
24
#include <vector>
25
26
#include "common/status.h"
27
#include "util/slice.h"
28
29
namespace doris {
30
class faststring;
31
32
namespace segment_v2 {
33
enum CompressionTypePB : int;
34
} // namespace segment_v2
35
36
// This class is used to encapsulate Compression/Decompression algorithm.
37
// This class only used to compress a block data, which means all data
38
// should given when call compress or decompress. This class don't handle
39
// stream compression.
40
//
41
// NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads
42
//
43
44
// max compression reuse buffer size
45
// if max_compress_len is bigger than this, don't use faststring in context
46
const static int MAX_COMPRESSION_BUFFER_SIZE_FOR_REUSE = 1024 * 1024 * 8;
47
class BlockCompressionCodec {
48
public:
49
6
    virtual ~BlockCompressionCodec() {}
50
51
0
    virtual Status init() { return Status::OK(); }
52
53
    // This function will compress input data into output.
54
    // output should be preallocated, and its capacity must be large enough
55
    // for compressed input, which can be get through max_compressed_len function.
56
    // Size of compressed data will be set in output's size.
57
    virtual Status compress(const Slice& input, faststring* output) = 0;
58
59
    // Default implementation will merge input list into a big buffer and call
60
    // compress(Slice) to finish compression. If compression type support digesting
61
    // slice one by one, it should reimplement this function.
62
    virtual Status compress(const std::vector<Slice>& input, size_t uncompressed_size,
63
                            faststring* output);
64
65
    // Decompress input data into output, output's capacity should be large enough
66
    // for decompressed data.
67
    // Size of decompressed data will be set in output's size.
68
    virtual Status decompress(const Slice& input, Slice* output) = 0;
69
70
    // Returns an upper bound on the max compressed length.
71
    virtual size_t max_compressed_len(size_t len) = 0;
72
73
    virtual bool exceed_max_compress_len(size_t uncompressed_size);
74
};
75
76
// Get a BlockCompressionCodec through type.
77
// Return Status::OK if a valid codec is found. If codec is null, it means it is
78
// NO_COMPRESSION. If codec is not null, user can use it to compress/decompress
79
// data.
80
//
81
// NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads
82
//
83
// Return not OK, if error happens.
84
Status get_block_compression_codec(segment_v2::CompressionTypePB type,
85
                                   BlockCompressionCodec** codec);
86
87
Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_codec,
88
                                   BlockCompressionCodec** codec);
89
90
// TODO: refactor code as CompressionOutputStream and CompressionInputStream
91
Status get_block_compression_codec(TFileCompressType::type type, BlockCompressionCodec** codec);
92
93
} // namespace doris