Coverage Report

Created: 2026-03-15 08:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_base.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/cast/cast_base.h"
19
20
#include <cstdint>
21
22
#include "util/jsonb_writer.h"
23
namespace doris::CastWrapper {
24
25
Status cast_from_generic_to_jsonb(FunctionContext* context, Block& block,
26
                                  const ColumnNumbers& arguments, uint32_t result,
27
0
                                  size_t input_rows_count, const NullMap::value_type* null_map) {
28
0
    auto data_type_to = block.get_by_position(result).type;
29
0
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
30
0
    const IDataType& type = *col_with_type_and_name.type;
31
0
    const IColumn& col_from = *col_with_type_and_name.column;
32
33
0
    auto column_string = ColumnString::create();
34
0
    JsonbWriter writer;
35
36
0
    ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0);
37
0
    ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
38
0
    DataTypeSerDe::FormatOptions format_options;
39
0
    format_options.converted_from_string = true;
40
0
    DataTypeSerDeSPtr from_serde = type.get_serde();
41
0
    DataTypeSerDeSPtr to_serde = data_type_to->get_serde();
42
0
    auto col_to = data_type_to->create_column();
43
44
0
    auto tmp_col = ColumnString::create();
45
0
    DataTypeSerDe::FormatOptions options;
46
0
    auto time_zone = cctz::utc_time_zone();
47
0
    options.timezone =
48
0
            (context && context->state()) ? &context->state()->timezone_obj() : &time_zone;
49
50
0
    options.escape_char = '\\';
51
0
    for (size_t i = 0; i < input_rows_count; i++) {
52
        // convert to string
53
0
        tmp_col->clear();
54
0
        VectorBufferWriter write_buffer(*tmp_col.get());
55
0
        Status st = from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options);
56
        // if serialized failed, will return null
57
0
        (*vec_null_map_to)[i] = !st.ok();
58
0
        if (!st.ok()) {
59
0
            col_to->insert_default();
60
0
            continue;
61
0
        }
62
0
        write_buffer.commit();
63
0
        writer.reset();
64
0
        auto str_ref = tmp_col->get_data_at(0);
65
0
        Slice data((char*)(str_ref.data), str_ref.size);
66
        // first try to parse string
67
0
        st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options);
68
        // if parsing failed, will return null
69
0
        (*vec_null_map_to)[i] = !st.ok();
70
0
        if (!st.ok()) {
71
0
            col_to->insert_default();
72
0
        }
73
0
    }
74
75
0
    block.replace_by_position(
76
0
            result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)));
77
0
    return Status::OK();
78
0
}
79
80
Status cast_from_string_to_generic(FunctionContext* context, Block& block,
81
                                   const ColumnNumbers& arguments, uint32_t result,
82
0
                                   size_t input_rows_count, const NullMap::value_type* null_map) {
83
0
    const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
84
0
    const IColumn& col_from = *col_with_type_and_name.column;
85
    // result column must set type
86
0
    DCHECK(block.get_by_position(result).type != nullptr);
87
0
    auto data_type_to = block.get_by_position(result).type;
88
0
    if (const auto* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
89
0
        auto col_to = data_type_to->create_column();
90
0
        auto serde = data_type_to->get_serde();
91
0
        size_t size = col_from.size();
92
0
        col_to->reserve(size);
93
94
0
        ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
95
0
        ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
96
0
        const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
97
0
        DataTypeSerDe::FormatOptions format_options;
98
0
        format_options.converted_from_string = true;
99
0
        format_options.escape_char = '\\';
100
101
0
        for (size_t i = 0; i < size; ++i) {
102
0
            const auto& val = col_from_string->get_data_at(i);
103
            // Note: here we should handle the null element
104
0
            if (val.size == 0) {
105
0
                col_to->insert_default();
106
                // empty string('') is an invalid format for complex type, set null_map to 1
107
0
                if (is_complex) {
108
0
                    (*vec_null_map_to)[i] = 1;
109
0
                }
110
0
                continue;
111
0
            }
112
0
            Slice string_slice(val.data, val.size);
113
0
            Status st =
114
0
                    serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options);
115
            // if parsing failed, will return null
116
0
            (*vec_null_map_to)[i] = !st.ok();
117
0
            if (!st.ok()) {
118
0
                col_to->insert_default();
119
0
            }
120
0
        }
121
0
        block.get_by_position(result).column =
122
0
                ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
123
0
    } else {
124
0
        return Status::RuntimeError(
125
0
                "Illegal column {} of first argument of conversion function from string",
126
0
                col_from.get_name());
127
0
    }
128
0
    return Status::OK();
129
0
}
130
131
ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types,
132
0
                                     const DataTypes& to_element_types) {
133
0
    DCHECK(from_element_types.size() == to_element_types.size());
134
0
    ElementWrappers element_wrappers;
135
0
    element_wrappers.reserve(from_element_types.size());
136
0
    for (size_t i = 0; i < from_element_types.size(); ++i) {
137
0
        const DataTypePtr& from_element_type = from_element_types[i];
138
0
        const DataTypePtr& to_element_type = to_element_types[i];
139
0
        element_wrappers.push_back(
140
0
                prepare_unpack_dictionaries(context, from_element_type, to_element_type));
141
0
    }
142
0
    return element_wrappers;
143
0
}
144
145
0
WrapperType create_unsupport_wrapper(const String error_msg) {
146
0
    return [error_msg](FunctionContext* /*context*/, Block& /*block*/,
147
0
                       const ColumnNumbers& /*arguments*/, uint32_t /*result*/,
148
0
                       size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) {
149
0
        return Status::InvalidArgument(error_msg);
150
0
    };
151
0
}
152
153
0
WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) {
154
0
    const String error_msg =
155
0
            fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name);
156
0
    return create_unsupport_wrapper(error_msg);
157
0
}
158
159
131
WrapperType create_identity_wrapper(const DataTypePtr&) {
160
131
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
161
131
              uint32_t result, size_t /*input_rows_count*/,
162
131
              const NullMap::value_type* null_map = nullptr) {
163
131
        block.get_by_position(result).column = block.get_by_position(arguments.front()).column;
164
131
        return Status::OK();
165
131
    };
166
131
}
167
168
/// the only difference between these two functions is throw error or not when parsing fail.
169
/// the return columns are both nullable columns.
170
Status cast_from_string_to_complex_type(FunctionContext* context, Block& block,
171
                                        const ColumnNumbers& arguments, uint32_t result,
172
                                        size_t input_rows_count,
173
9
                                        const NullMap::value_type* null_map) {
174
9
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
175
9
            block.get_by_position(arguments[0]).column.get());
176
177
9
    auto to_type = block.get_by_position(result).type;
178
9
    auto to_serde = remove_nullable(to_type)->get_serde();
179
180
    // string to complex type is always nullable
181
9
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
182
9
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
183
9
    auto& nested_column = nullable_col_to.get_nested_column();
184
185
9
    DataTypeSerDe::FormatOptions options;
186
9
    options.converted_from_string = true;
187
9
    options.escape_char = '\\';
188
9
    options.timezone = &context->state()->timezone_obj();
189
190
61
    for (size_t i = 0; i < input_rows_count; ++i) {
191
52
        if (null_map && null_map[i]) {
192
0
            nullable_col_to.insert_default();
193
52
        } else {
194
52
            auto str = col_from->get_data_at(i);
195
52
            Status st = to_serde->from_string(str, nested_column, options);
196
52
            if (st.ok()) {
197
44
                nullable_col_to.get_null_map_data().push_back(0);
198
44
            } else {
199
8
                nullable_col_to.insert_default(); // fill null if fail
200
8
            }
201
52
        }
202
52
    }
203
204
9
    block.get_by_position(result).column = std::move(to_column);
205
9
    return Status::OK();
206
9
}
207
208
Status cast_from_string_to_complex_type_strict_mode(FunctionContext* context, Block& block,
209
                                                    const ColumnNumbers& arguments, uint32_t result,
210
                                                    size_t input_rows_count,
211
0
                                                    const NullMap::value_type* null_map) {
212
0
    const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
213
0
            block.get_by_position(arguments[0]).column.get());
214
215
0
    auto to_type = block.get_by_position(result).type;
216
0
    auto to_serde = remove_nullable(to_type)->get_serde();
217
218
    // string to complex type is always nullable
219
0
    MutableColumnPtr to_column = make_nullable(to_type)->create_column();
220
0
    auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
221
0
    auto& nested_column = nullable_col_to.get_nested_column();
222
223
0
    DataTypeSerDe::FormatOptions options;
224
0
    options.converted_from_string = true;
225
0
    options.escape_char = '\\';
226
0
    options.timezone = &context->state()->timezone_obj();
227
228
0
    for (size_t i = 0; i < input_rows_count; ++i) {
229
0
        if (null_map && null_map[i]) {
230
0
            to_column->insert_default();
231
0
        } else {
232
0
            auto str = col_from->get_data_at(i);
233
0
            RETURN_IF_ERROR(to_serde->from_string_strict_mode(str, nested_column, options));
234
            // fill not null if success
235
0
            nullable_col_to.get_null_map_data().push_back(0);
236
0
        }
237
0
    }
238
0
    block.get_by_position(result).column = std::move(to_column);
239
0
    return Status::OK();
240
0
}
241
242
} // namespace doris::CastWrapper