Coverage Report

Created: 2026-03-15 18:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_base.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/cast/cast_base.h"
19
20
#include <cstdint>
21
22
#include "util/jsonb_writer.h"
23
namespace doris::CastWrapper {
24
25
Status cast_from_generic_to_jsonb(FunctionContext* context, Block& block,
26
                                  const ColumnNumbers& arguments, uint32_t result,
27
48
                                  size_t input_rows_count, const NullMap::value_type* null_map) {
28
48
    auto data_type_to = block.get_by_position(result).type;
29
48
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
30
48
    const IDataType& type = *col_with_type_and_name.type;
31
48
    const IColumn& col_from = *col_with_type_and_name.column;
32
33
48
    auto column_string = ColumnString::create();
34
48
    JsonbWriter writer;
35
36
48
    ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0);
37
48
    ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
38
48
    DataTypeSerDe::FormatOptions format_options;
39
48
    format_options.converted_from_string = true;
40
48
    DataTypeSerDeSPtr from_serde = type.get_serde();
41
48
    DataTypeSerDeSPtr to_serde = data_type_to->get_serde();
42
48
    auto col_to = data_type_to->create_column();
43
44
48
    auto tmp_col = ColumnString::create();
45
48
    DataTypeSerDe::FormatOptions options;
46
48
    auto time_zone = cctz::utc_time_zone();
47
48
    options.timezone =
48
48
            (context && context->state()) ? &context->state()->timezone_obj() : &time_zone;
49
50
48
    options.escape_char = '\\';
51
16.5k
    for (size_t i = 0; i < input_rows_count; i++) {
52
        // convert to string
53
16.4k
        tmp_col->clear();
54
16.4k
        VectorBufferWriter write_buffer(*tmp_col.get());
55
16.4k
        Status st = from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options);
56
        // if serialized failed, will return null
57
16.4k
        (*vec_null_map_to)[i] = !st.ok();
58
16.4k
        if (!st.ok()) {
59
0
            col_to->insert_default();
60
0
            continue;
61
0
        }
62
16.4k
        write_buffer.commit();
63
16.4k
        writer.reset();
64
16.4k
        auto str_ref = tmp_col->get_data_at(0);
65
16.4k
        Slice data((char*)(str_ref.data), str_ref.size);
66
        // first try to parse string
67
16.4k
        st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options);
68
        // if parsing failed, will return null
69
16.4k
        (*vec_null_map_to)[i] = !st.ok();
70
16.4k
        if (!st.ok()) {
71
0
            col_to->insert_default();
72
0
        }
73
16.4k
    }
74
75
48
    block.replace_by_position(
76
48
            result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)));
77
48
    return Status::OK();
78
48
}
79
80
Status cast_from_string_to_generic(FunctionContext* context, Block& block,
81
                                   const ColumnNumbers& arguments, uint32_t result,
82
3
                                   size_t input_rows_count, const NullMap::value_type* null_map) {
83
3
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
84
3
    const IColumn& col_from = *col_with_type_and_name.column;
85
    // result column must set type
86
3
    DCHECK(block.get_by_position(result).type != nullptr);
87
3
    auto data_type_to = block.get_by_position(result).type;
88
3
    if (const auto* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
89
3
        auto col_to = data_type_to->create_column();
90
3
        auto serde = data_type_to->get_serde();
91
3
        size_t size = col_from.size();
92
3
        col_to->reserve(size);
93
94
3
        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
95
3
        ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
96
3
        const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
97
3
        DataTypeSerDe::FormatOptions format_options;
98
3
        format_options.converted_from_string = true;
99
3
        format_options.escape_char = '\\';
100
101
23
        for (size_t i = 0; i < size; ++i) {
102
20
            const auto& val = col_from_string->get_data_at(i);
103
            // Note: here we should handle the null element
104
20
            if (val.size == 0) {
105
2
                col_to->insert_default();
106
                // empty string('') is an invalid format for complex type, set null_map to 1
107
2
                if (is_complex) {
108
0
                    (*vec_null_map_to)[i] = 1;
109
0
                }
110
2
                continue;
111
2
            }
112
18
            Slice string_slice(val.data, val.size);
113
18
            Status st =
114
18
                    serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options);
115
            // if parsing failed, will return null
116
18
            (*vec_null_map_to)[i] = !st.ok();
117
18
            if (!st.ok()) {
118
0
                col_to->insert_default();
119
0
            }
120
18
        }
121
3
        block.get_by_position(result).column =
122
3
                ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
123
3
    } else {
124
0
        return Status::RuntimeError(
125
0
                "Illegal column {} of first argument of conversion function from string",
126
0
                col_from.get_name());
127
0
    }
128
3
    return Status::OK();
129
3
}
130
131
ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types,
132
1.75k
                                     const DataTypes& to_element_types) {
133
1.75k
    DCHECK(from_element_types.size() == to_element_types.size());
134
1.75k
    ElementWrappers element_wrappers;
135
1.75k
    element_wrappers.reserve(from_element_types.size());
136
9.32k
    for (size_t i = 0; i < from_element_types.size(); ++i) {
137
7.56k
        const DataTypePtr& from_element_type = from_element_types[i];
138
7.56k
        const DataTypePtr& to_element_type = to_element_types[i];
139
7.56k
        element_wrappers.push_back(
140
7.56k
                prepare_unpack_dictionaries(context, from_element_type, to_element_type));
141
7.56k
    }
142
1.75k
    return element_wrappers;
143
1.75k
}
144
145
83
WrapperType create_unsupport_wrapper(const String error_msg) {
146
83
    return [error_msg](FunctionContext* /*context*/, Block& /*block*/,
147
83
                       const ColumnNumbers& /*arguments*/, uint32_t /*result*/,
148
83
                       size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) {
149
83
        return Status::InvalidArgument(error_msg);
150
83
    };
151
83
}
152
153
0
WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) {
154
0
    const String error_msg =
155
0
            fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name);
156
0
    return create_unsupport_wrapper(error_msg);
157
0
}
158
159
68.2k
WrapperType create_identity_wrapper(const DataTypePtr&) {
160
68.2k
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
161
68.2k
              uint32_t result, size_t /*input_rows_count*/,
162
68.2k
              const NullMap::value_type* null_map = nullptr) {
163
68.2k
        block.get_by_position(result).column = block.get_by_position(arguments.front()).column;
164
68.2k
        return Status::OK();
165
68.2k
    };
166
68.2k
}
167
168
/// the only difference between these two functions is throw error or not when parsing fail.
169
/// the return columns are both nullable columns.
170
Status cast_from_string_to_complex_type(FunctionContext* context, Block& block,
171
                                        const ColumnNumbers& arguments, uint32_t result,
172
                                        size_t input_rows_count,
173
3.64k
                                        const NullMap::value_type* null_map) {
174
3.64k
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
175
3.64k
            block.get_by_position(arguments[0]).column.get());
176
177
3.64k
    auto to_type = block.get_by_position(result).type;
178
3.64k
    auto to_serde = remove_nullable(to_type)->get_serde();
179
180
    // string to complex type is always nullable
181
3.64k
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
182
3.64k
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
183
3.64k
    auto& nested_column = nullable_col_to.get_nested_column();
184
185
3.64k
    DataTypeSerDe::FormatOptions options;
186
3.64k
    options.converted_from_string = true;
187
3.64k
    options.escape_char = '\\';
188
3.64k
    options.timezone = &context->state()->timezone_obj();
189
190
656k
    for (size_t i = 0; i < input_rows_count; ++i) {
191
653k
        if (null_map && null_map[i]) {
192
26.2k
            nullable_col_to.insert_default();
193
626k
        } else {
194
626k
            auto str = col_from->get_data_at(i);
195
626k
            Status st = to_serde->from_string(str, nested_column, options);
196
626k
            if (st.ok()) {
197
360k
                nullable_col_to.get_null_map_data().push_back(0);
198
360k
            } else {
199
266k
                nullable_col_to.insert_default(); // fill null if fail
200
266k
            }
201
626k
        }
202
653k
    }
203
204
3.64k
    block.get_by_position(result).column = std::move(to_column);
205
3.64k
    return Status::OK();
206
3.64k
}
207
208
Status cast_from_string_to_complex_type_strict_mode(FunctionContext* context, Block& block,
209
                                                    const ColumnNumbers& arguments, uint32_t result,
210
                                                    size_t input_rows_count,
211
7.80k
                                                    const NullMap::value_type* null_map) {
212
7.80k
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
213
7.80k
            block.get_by_position(arguments[0]).column.get());
214
215
7.80k
    auto to_type = block.get_by_position(result).type;
216
7.80k
    auto to_serde = remove_nullable(to_type)->get_serde();
217
218
    // string to complex type is always nullable
219
7.80k
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
220
7.80k
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
221
7.80k
    auto& nested_column = nullable_col_to.get_nested_column();
222
223
7.80k
    DataTypeSerDe::FormatOptions options;
224
7.80k
    options.converted_from_string = true;
225
7.80k
    options.escape_char = '\\';
226
7.80k
    options.timezone = &context->state()->timezone_obj();
227
228
15.6k
    for (size_t i = 0; i < input_rows_count; ++i) {
229
7.81k
        if (null_map && null_map[i]) {
230
0
            to_column->insert_default();
231
7.81k
        } else {
232
7.81k
            auto str = col_from->get_data_at(i);
233
7.81k
            RETURN_IF_ERROR(to_serde->from_string_strict_mode(str, nested_column, options));
234
            // fill not null if success
235
7.79k
            nullable_col_to.get_null_map_data().push_back(0);
236
7.79k
        }
237
7.81k
    }
238
7.78k
    block.get_by_position(result).column = std::move(to_column);
239
7.78k
    return Status::OK();
240
7.80k
}
241
242
} // namespace doris::CastWrapper