/root/doris/be/src/util/block_compression.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <gen_cpp/PlanNodes_types.h> | 
| 21 |  | #include <gen_cpp/parquet_types.h> | 
| 22 |  |  | 
| 23 |  | #include <cstddef> | 
| 24 |  | #include <vector> | 
| 25 |  |  | 
| 26 |  | #include "common/status.h" | 
| 27 |  | #include "util/slice.h" | 
| 28 |  |  | 
| 29 |  | namespace doris { | 
| 30 |  | class faststring; | 
| 31 |  |  | 
| 32 |  | namespace segment_v2 { | 
| 33 |  | enum CompressionTypePB : int; | 
| 34 |  | } // namespace segment_v2 | 
| 35 |  |  | 
| 36 |  | // This class is used to encapsulate Compression/Decompression algorithm. | 
| 37 |  | // This class only used to compress a block data, which means all data | 
| 38 |  | // should given when call compress or decompress. This class don't handle | 
| 39 |  | // stream compression. | 
| 40 |  | // | 
| 41 |  | // NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads | 
| 42 |  | // | 
| 43 |  |  | 
| 44 |  | // max compression reuse buffer size | 
| 45 |  | // if max_compress_len is bigger than this, don't use faststring in context | 
| 46 |  | const static int MAX_COMPRESSION_BUFFER_SIZE_FOR_REUSE = 1024 * 1024 * 8; | 
| 47 |  | class BlockCompressionCodec { | 
| 48 |  | public: | 
| 49 | 6 |     virtual ~BlockCompressionCodec() {} | 
| 50 |  |  | 
| 51 | 0 |     virtual Status init() { return Status::OK(); } | 
| 52 |  |  | 
| 53 |  |     // This function will compress input data into output. | 
| 54 |  |     // output should be preallocated, and its capacity must be large enough | 
| 55 |  |     // for compressed input, which can be get through max_compressed_len function. | 
| 56 |  |     // Size of compressed data will be set in output's size. | 
| 57 |  |     virtual Status compress(const Slice& input, faststring* output) = 0; | 
| 58 |  |  | 
| 59 |  |     // Default implementation will merge input list into a big buffer and call | 
| 60 |  |     // compress(Slice) to finish compression. If compression type support digesting | 
| 61 |  |     // slice one by one, it should reimplement this function. | 
| 62 |  |     virtual Status compress(const std::vector<Slice>& input, size_t uncompressed_size, | 
| 63 |  |                             faststring* output); | 
| 64 |  |  | 
| 65 |  |     // Decompress input data into output, output's capacity should be large enough | 
| 66 |  |     // for decompressed data. | 
| 67 |  |     // Size of decompressed data will be set in output's size. | 
| 68 |  |     virtual Status decompress(const Slice& input, Slice* output) = 0; | 
| 69 |  |  | 
| 70 |  |     // Returns an upper bound on the max compressed length. | 
| 71 |  |     virtual size_t max_compressed_len(size_t len) = 0; | 
| 72 |  |  | 
| 73 |  |     virtual bool exceed_max_compress_len(size_t uncompressed_size); | 
| 74 |  | }; | 
| 75 |  |  | 
| 76 |  | // Get a BlockCompressionCodec through type. | 
| 77 |  | // Return Status::OK if a valid codec is found. If codec is null, it means it is | 
| 78 |  | // NO_COMPRESSION. If codec is not null, user can use it to compress/decompress | 
| 79 |  | // data. | 
| 80 |  | // | 
| 81 |  | // NOTICE!! BlockCompressionCodec is NOT thread safe, it should NOT be shared by threads | 
| 82 |  | // | 
| 83 |  | // Return not OK, if error happens. | 
| 84 |  | Status get_block_compression_codec(segment_v2::CompressionTypePB type, | 
| 85 |  |                                    BlockCompressionCodec** codec); | 
| 86 |  |  | 
| 87 |  | Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_codec, | 
| 88 |  |                                    BlockCompressionCodec** codec); | 
| 89 |  |  | 
| 90 |  | // TODO: refactor code as CompressionOutputStream and CompressionInputStream | 
| 91 |  | Status get_block_compression_codec(TFileCompressType::type type, BlockCompressionCodec** codec); | 
| 92 |  |  | 
| 93 |  | } // namespace doris |