Coverage Report

Created: 2026-03-13 09:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_base.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/cast/cast_base.h"
19
20
#include <cstdint>
21
22
#include "util/jsonb_writer.h"
23
namespace doris::CastWrapper {
24
25
Status cast_from_generic_to_jsonb(FunctionContext* context, Block& block,
26
                                  const ColumnNumbers& arguments, uint32_t result,
27
32
                                  size_t input_rows_count, const NullMap::value_type* null_map) {
28
32
    auto data_type_to = block.get_by_position(result).type;
29
32
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
30
32
    const IDataType& type = *col_with_type_and_name.type;
31
32
    const IColumn& col_from = *col_with_type_and_name.column;
32
33
32
    auto column_string = ColumnString::create();
34
32
    JsonbWriter writer;
35
36
32
    ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0);
37
32
    ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
38
32
    DataTypeSerDe::FormatOptions format_options;
39
32
    format_options.converted_from_string = true;
40
32
    DataTypeSerDeSPtr from_serde = type.get_serde();
41
32
    DataTypeSerDeSPtr to_serde = data_type_to->get_serde();
42
32
    auto col_to = data_type_to->create_column();
43
44
32
    auto tmp_col = ColumnString::create();
45
32
    DataTypeSerDe::FormatOptions options;
46
32
    auto time_zone = cctz::utc_time_zone();
47
32
    options.timezone =
48
32
            (context && context->state()) ? &context->state()->timezone_obj() : &time_zone;
49
50
32
    options.escape_char = '\\';
51
16.4k
    for (size_t i = 0; i < input_rows_count; i++) {
52
        // convert to string
53
16.4k
        tmp_col->clear();
54
16.4k
        VectorBufferWriter write_buffer(*tmp_col.get());
55
16.4k
        Status st = from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options);
56
        // if serialized failed, will return null
57
16.4k
        (*vec_null_map_to)[i] = !st.ok();
58
16.4k
        if (!st.ok()) {
59
0
            col_to->insert_default();
60
0
            continue;
61
0
        }
62
16.4k
        write_buffer.commit();
63
16.4k
        writer.reset();
64
16.4k
        auto str_ref = tmp_col->get_data_at(0);
65
16.4k
        Slice data((char*)(str_ref.data), str_ref.size);
66
        // first try to parse string
67
16.4k
        st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options);
68
        // if parsing failed, will return null
69
16.4k
        (*vec_null_map_to)[i] = !st.ok();
70
16.4k
        if (!st.ok()) {
71
0
            col_to->insert_default();
72
0
        }
73
16.4k
    }
74
75
32
    block.replace_by_position(
76
32
            result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)));
77
32
    return Status::OK();
78
32
}
79
80
Status cast_from_string_to_generic(FunctionContext* context, Block& block,
81
                                   const ColumnNumbers& arguments, uint32_t result,
82
11
                                   size_t input_rows_count, const NullMap::value_type* null_map) {
83
11
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
84
11
    const IColumn& col_from = *col_with_type_and_name.column;
85
    // result column must set type
86
11
    DCHECK(block.get_by_position(result).type != nullptr);
87
11
    auto data_type_to = block.get_by_position(result).type;
88
11
    if (const auto* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
89
11
        auto col_to = data_type_to->create_column();
90
11
        auto serde = data_type_to->get_serde();
91
11
        size_t size = col_from.size();
92
11
        col_to->reserve(size);
93
94
11
        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
95
11
        ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
96
11
        const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
97
11
        DataTypeSerDe::FormatOptions format_options;
98
11
        format_options.converted_from_string = true;
99
11
        format_options.escape_char = '\\';
100
101
65
        for (size_t i = 0; i < size; ++i) {
102
54
            const auto& val = col_from_string->get_data_at(i);
103
            // Note: here we should handle the null element
104
54
            if (val.size == 0) {
105
2
                col_to->insert_default();
106
                // empty string('') is an invalid format for complex type, set null_map to 1
107
2
                if (is_complex) {
108
0
                    (*vec_null_map_to)[i] = 1;
109
0
                }
110
2
                continue;
111
2
            }
112
52
            Slice string_slice(val.data, val.size);
113
52
            Status st =
114
52
                    serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options);
115
            // if parsing failed, will return null
116
52
            (*vec_null_map_to)[i] = !st.ok();
117
52
            if (!st.ok()) {
118
0
                col_to->insert_default();
119
0
            }
120
52
        }
121
11
        block.get_by_position(result).column =
122
11
                ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
123
11
    } else {
124
0
        return Status::RuntimeError(
125
0
                "Illegal column {} of first argument of conversion function from string",
126
0
                col_from.get_name());
127
0
    }
128
11
    return Status::OK();
129
11
}
130
131
ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types,
132
4.80k
                                     const DataTypes& to_element_types) {
133
4.80k
    DCHECK(from_element_types.size() == to_element_types.size());
134
4.80k
    ElementWrappers element_wrappers;
135
4.80k
    element_wrappers.reserve(from_element_types.size());
136
18.2k
    for (size_t i = 0; i < from_element_types.size(); ++i) {
137
13.4k
        const DataTypePtr& from_element_type = from_element_types[i];
138
13.4k
        const DataTypePtr& to_element_type = to_element_types[i];
139
13.4k
        element_wrappers.push_back(
140
13.4k
                prepare_unpack_dictionaries(context, from_element_type, to_element_type));
141
13.4k
    }
142
4.80k
    return element_wrappers;
143
4.80k
}
144
145
84
WrapperType create_unsupport_wrapper(const String error_msg) {
146
84
    return [error_msg](FunctionContext* /*context*/, Block& /*block*/,
147
84
                       const ColumnNumbers& /*arguments*/, uint32_t /*result*/,
148
84
                       size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) {
149
84
        return Status::InvalidArgument(error_msg);
150
84
    };
151
84
}
152
153
0
WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) {
154
0
    const String error_msg =
155
0
            fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name);
156
0
    return create_unsupport_wrapper(error_msg);
157
0
}
158
159
75.2k
WrapperType create_identity_wrapper(const DataTypePtr&) {
160
75.2k
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
161
75.2k
              uint32_t result, size_t /*input_rows_count*/,
162
75.2k
              const NullMap::value_type* null_map = nullptr) {
163
75.2k
        block.get_by_position(result).column = block.get_by_position(arguments.front()).column;
164
75.2k
        return Status::OK();
165
75.2k
    };
166
75.2k
}
167
168
/// the only difference between these two functions is throw error or not when parsing fail.
169
/// the return columns are both nullable columns.
170
Status cast_from_string_to_complex_type(FunctionContext* context, Block& block,
171
                                        const ColumnNumbers& arguments, uint32_t result,
172
                                        size_t input_rows_count,
173
4.25k
                                        const NullMap::value_type* null_map) {
174
4.25k
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
175
4.25k
            block.get_by_position(arguments[0]).column.get());
176
177
4.25k
    auto to_type = block.get_by_position(result).type;
178
4.25k
    auto to_serde = remove_nullable(to_type)->get_serde();
179
180
    // string to complex type is always nullable
181
4.25k
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
182
4.25k
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
183
4.25k
    auto& nested_column = nullable_col_to.get_nested_column();
184
185
4.25k
    DataTypeSerDe::FormatOptions options;
186
4.25k
    options.converted_from_string = true;
187
4.25k
    options.escape_char = '\\';
188
4.25k
    options.timezone = &context->state()->timezone_obj();
189
190
1.32M
    for (size_t i = 0; i < input_rows_count; ++i) {
191
1.32M
        if (null_map && null_map[i]) {
192
26.3k
            nullable_col_to.insert_default();
193
1.29M
        } else {
194
1.29M
            auto str = col_from->get_data_at(i);
195
1.29M
            Status st = to_serde->from_string(str, nested_column, options);
196
1.29M
            if (st.ok()) {
197
815k
                nullable_col_to.get_null_map_data().push_back(0);
198
815k
            } else {
199
483k
                nullable_col_to.insert_default(); // fill null if fail
200
483k
            }
201
1.29M
        }
202
1.32M
    }
203
204
4.25k
    block.get_by_position(result).column = std::move(to_column);
205
4.25k
    return Status::OK();
206
4.25k
}
207
208
Status cast_from_string_to_complex_type_strict_mode(FunctionContext* context, Block& block,
209
                                                    const ColumnNumbers& arguments, uint32_t result,
210
                                                    size_t input_rows_count,
211
7.83k
                                                    const NullMap::value_type* null_map) {
212
7.83k
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
213
7.83k
            block.get_by_position(arguments[0]).column.get());
214
215
7.83k
    auto to_type = block.get_by_position(result).type;
216
7.83k
    auto to_serde = remove_nullable(to_type)->get_serde();
217
218
    // string to complex type is always nullable
219
7.83k
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
220
7.83k
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
221
7.83k
    auto& nested_column = nullable_col_to.get_nested_column();
222
223
7.83k
    DataTypeSerDe::FormatOptions options;
224
7.83k
    options.converted_from_string = true;
225
7.83k
    options.escape_char = '\\';
226
7.83k
    options.timezone = &context->state()->timezone_obj();
227
228
15.6k
    for (size_t i = 0; i < input_rows_count; ++i) {
229
7.83k
        if (null_map && null_map[i]) {
230
0
            to_column->insert_default();
231
7.83k
        } else {
232
7.83k
            auto str = col_from->get_data_at(i);
233
7.83k
            RETURN_IF_ERROR(to_serde->from_string_strict_mode(str, nested_column, options));
234
            // fill not null if success
235
7.81k
            nullable_col_to.get_null_map_data().push_back(0);
236
7.81k
        }
237
7.83k
    }
238
7.81k
    block.get_by_position(result).column = std::move(to_column);
239
7.81k
    return Status::OK();
240
7.83k
}
241
242
} // namespace doris::CastWrapper