Coverage Report

Created: 2026-03-12 17:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_tokenize.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stddef.h>
21
22
#include <memory>
23
#include <string>
24
#include <vector>
25
26
#include "common/status.h"
27
#include "core/block/column_numbers.h"
28
#include "core/column/column_array.h"
29
#include "core/data_type/data_type.h"
30
#include "core/data_type/data_type_array.h"
31
#include "core/data_type/data_type_string.h"
32
#include "core/types.h"
33
#include "exprs/function/function.h"
34
#include "exprs/function/simple_function_factory.h"
35
#include "exprs/function_context.h"
36
#include "storage/index/inverted/inverted_index_parser.h"
37
#include "storage/index/inverted/inverted_index_reader.h"
38
39
namespace doris {
40
class Block;
41
} // namespace doris
42
43
namespace doris {
44
45
class FunctionTokenize : public IFunction {
46
public:
47
    static constexpr auto name = "tokenize";
48
49
18
    static FunctionPtr create() { return std::make_shared<FunctionTokenize>(); }
50
    using NullMapType = PaddedPODArray<UInt8>;
51
52
1
    String get_name() const override { return name; }
53
54
6
    bool is_variadic() const override { return false; }
55
56
5
    size_t get_number_of_arguments() const override { return 2; }
57
58
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
59
5
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
60
0
                << "first argument for function: " << name << " should be string"
61
0
                << " and arguments[0] is " << arguments[0]->get_name();
62
5
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
63
0
                << "second argument for function: " << name << " should be string"
64
0
                << " and arguments[1] is " << arguments[1]->get_name();
65
5
        return std::make_shared<DataTypeString>();
66
5
    }
67
    void _do_tokenize(const ColumnString& src_column_string,
68
                      const InvertedIndexAnalyzerCtx& analyzer_ctx, bool support_phrase,
69
                      const MutableColumnPtr& dest_column_ptr) const;
70
    void _do_tokenize_none(const ColumnString& src_column_string,
71
                           const MutableColumnPtr& dest_column_ptr) const;
72
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
73
                        uint32_t result, size_t /*input_rows_count*/) const override;
74
};
75
} // namespace doris