Coverage Report

Created: 2026-03-14 04:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_to_jsonb.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "core/assert_cast.h"
19
#include "core/data_type/data_type_jsonb.h"
20
#include "core/data_type/data_type_nullable.h"
21
#include "core/data_type/primitive_type.h"
22
#include "core/data_type_serde/data_type_serde.h"
23
#include "core/string_ref.h"
24
#include "core/value/jsonb_value.h"
25
#include "exprs/function/cast/cast_base.h"
26
#include "exprs/function/cast/cast_to_string.h"
27
#include "util/io_helper.h"
28
#include "util/jsonb_utils.h"
29
#include "util/jsonb_writer.h"
30
31
namespace doris::CastWrapper {
32
#include "common/compile_check_begin.h"
33
34
struct ConvertImplGenericFromJsonb {
35
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
36
                          uint32_t result, size_t input_rows_count,
37
1
                          const NullMap::value_type* null_map = nullptr) {
38
1
        auto data_type_to = block.get_by_position(result).type;
39
1
        auto data_type_serde_to = data_type_to->get_serde();
40
41
1
        DataTypeSerDe::FormatOptions options;
42
1
        options.converted_from_string = true;
43
1
        options.escape_char = '\\';
44
1
        options.timezone = &context->state()->timezone_obj();
45
46
1
        const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
47
1
        const IColumn& col_from = *col_with_type_and_name.column;
48
1
        if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
49
1
            auto col_to = data_type_to->create_column();
50
51
1
            size_t size = col_from.size();
52
1
            col_to->reserve(size);
53
54
1
            ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
55
1
            ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
56
1
            const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
57
1
            const bool is_dst_string = is_string_type(data_type_to->get_primitive_type());
58
2
            for (size_t i = 0; i < size; ++i) {
59
1
                const auto& val = col_from_string->get_data_at(i);
60
1
                const JsonbDocument* doc = nullptr;
61
1
                auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc);
62
1
                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
63
1
                    (*vec_null_map_to)[i] = 1;
64
1
                    col_to->insert_default();
65
1
                    continue;
66
1
                }
67
68
                // value is NOT necessary to be deleted since JsonbValue will not allocate memory
69
0
                const JsonbValue* value = doc->getValue();
70
0
                if (UNLIKELY(!value)) {
71
0
                    (*vec_null_map_to)[i] = 1;
72
0
                    col_to->insert_default();
73
0
                    continue;
74
0
                }
75
                // Note: here we should handle the null element
76
0
                if (val.size == 0) {
77
0
                    col_to->insert_default();
78
                    // empty string('') is an invalid format for complex type, set null_map to 1
79
0
                    if (is_complex) {
80
0
                        (*vec_null_map_to)[i] = 1;
81
0
                    }
82
0
                    continue;
83
0
                }
84
                // add string to string column
85
0
                if (context->jsonb_string_as_string() && is_dst_string && value->isString()) {
86
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
87
0
                    assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data(
88
0
                            blob->getBlob(), blob->getBlobLen());
89
0
                    (*vec_null_map_to)[i] = 0;
90
0
                    continue;
91
0
                }
92
0
                std::string input_str;
93
0
                if (context->jsonb_string_as_string() && value->isString()) {
94
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
95
0
                    input_str = std::string(blob->getBlob(), blob->getBlobLen());
96
0
                } else {
97
0
                    input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size);
98
0
                }
99
0
                if (input_str.empty()) {
100
0
                    col_to->insert_default();
101
0
                    (*vec_null_map_to)[i] = 1;
102
0
                    continue;
103
0
                }
104
0
                StringRef read_buffer((char*)(input_str.data()), input_str.size());
105
0
                st = data_type_serde_to->from_string(read_buffer, *col_to, options);
106
                // if parsing failed, will return null
107
0
                (*vec_null_map_to)[i] = !st.ok();
108
0
                if (!st.ok()) {
109
0
                    col_to->insert_default();
110
0
                }
111
0
            }
112
1
            block.get_by_position(result).column =
113
1
                    ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
114
1
        } else {
115
0
            return Status::RuntimeError(
116
0
                    "Illegal column {} of first argument of conversion function from string",
117
0
                    col_from.get_name());
118
0
        }
119
1
        return Status::OK();
120
1
    }
121
};
122
123
0
inline bool can_cast_json_type(PrimitiveType pt) {
124
0
    return is_int_or_bool(pt) || is_float_or_double(pt) || is_string_type(pt) || is_decimal(pt) ||
125
0
           pt == TYPE_ARRAY || pt == TYPE_STRUCT;
126
0
}
127
128
// check jsonb value type and get to_type value
129
WrapperType create_cast_from_jsonb_wrapper(const DataTypeJsonb& from_type,
130
                                           const DataTypePtr& to_type,
131
113
                                           bool jsonb_string_as_string) {
132
113
    if (is_string_type(to_type->get_primitive_type()) && jsonb_string_as_string) {
133
1
        return ConvertImplGenericFromJsonb::execute;
134
1
    }
135
136
112
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
137
112
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
138
112
        CastParameters params;
139
112
        params.is_strict = context->enable_strict_mode();
140
141
112
        auto data_type_to = remove_nullable(block.get_by_position(result).type);
142
112
        auto serde_to = data_type_to->get_serde();
143
144
112
        const auto& col_from_json =
145
112
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
146
147
112
        auto column_to = make_nullable(data_type_to)->create_column();
148
112
        auto& column_to_nullable = assert_cast<ColumnNullable&>(*column_to);
149
150
112
        RETURN_IF_ERROR(serde_to->deserialize_column_from_jsonb_vector(column_to_nullable,
151
112
                                                                       col_from_json, params));
152
153
112
        block.get_by_position(result).column = std::move(column_to);
154
112
        return Status::OK();
155
112
    };
156
113
}
157
158
struct ParseJsonbFromString {
159
4
    static Status parse_json(const StringRef& str, ColumnString& column_string) {
160
4
        if (str.empty()) {
161
0
            return Status::InvalidArgument("Empty string cannot be parsed as jsonb");
162
0
        }
163
4
        JsonBinaryValue value;
164
4
        auto st = (value.from_json_string(str.data, str.size));
165
4
        if (!st.ok()) {
166
2
            return Status::InvalidArgument("Failed to parse json string: {}, error: {}",
167
2
                                           str.to_string(), st.msg());
168
2
        }
169
2
        column_string.insert_data(value.value(), value.size());
170
2
        return Status::OK();
171
4
    }
172
173
    static Status execute_non_strict(const ColumnString& col_from, size_t size,
174
4
                                     ColumnPtr& column_result) {
175
4
        auto col_to = ColumnString::create();
176
4
        auto col_null = ColumnBool::create(size, 0);
177
4
        auto& vec_null_map_to = col_null->get_data();
178
179
8
        for (size_t i = 0; i < size; ++i) {
180
4
            Status st = parse_json(col_from.get_data_at(i), *col_to);
181
4
            vec_null_map_to[i] = !st.ok();
182
4
            if (!st.ok()) [[unlikely]] {
183
2
                col_to->insert_default();
184
2
            }
185
4
        }
186
4
        column_result = ColumnNullable::create(std::move(col_to), std::move(col_null));
187
4
        return Status::OK();
188
4
    }
189
190
    // in both strict or non-strict mode, the return type is nullable column
191
    static Status execute_strict(const ColumnString& col_from, const NullMap::value_type* null_map,
192
0
                                 size_t size, ColumnPtr& column_result) {
193
0
        auto col_to = ColumnString::create();
194
0
        for (size_t i = 0; i < size; ++i) {
195
0
            if (null_map && null_map[i]) {
196
0
                col_to->insert_default();
197
0
                continue;
198
0
            }
199
0
            RETURN_IF_ERROR(parse_json(col_from.get_data_at(i), *col_to));
200
0
        }
201
0
        column_result = ColumnNullable::create(std::move(col_to), ColumnBool::create(size, 0));
202
0
        return Status::OK();
203
0
    }
204
205
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
206
                          uint32_t result, size_t input_rows_count,
207
4
                          const NullMap::value_type* null_map) {
208
4
        const auto& col_from =
209
4
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
210
4
        const auto size = col_from.size();
211
212
4
        ColumnPtr column_result;
213
4
        if (context->enable_strict_mode()) {
214
0
            RETURN_IF_ERROR(execute_strict(col_from, null_map, size, column_result));
215
216
4
        } else {
217
4
            RETURN_IF_ERROR(execute_non_strict(col_from, size, column_result));
218
4
        }
219
4
        block.get_by_position(result).column = std::move(column_result);
220
221
4
        return Status::OK();
222
4
    }
223
};
224
225
// create corresponding jsonb value with type to_type
226
// use jsonb writer to create jsonb value
227
WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type,
228
48.1k
                                         bool string_as_jsonb_string) {
229
    // parse string as jsonb
230
48.1k
    if (is_string_type(from_type->get_primitive_type()) && !string_as_jsonb_string) {
231
4
        return ParseJsonbFromString::execute;
232
4
    }
233
234
48.1k
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
235
48.1k
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
236
        // same as to_json function
237
48.1k
        auto to_column = ColumnString::create();
238
48.1k
        auto from_type_serde = block.get_by_position(arguments[0]).type->get_serde();
239
48.1k
        auto from_column = block.get_by_position(arguments[0]).column;
240
48.1k
        RETURN_IF_ERROR(
241
48.1k
                from_type_serde->serialize_column_to_jsonb_vector(*from_column, *to_column));
242
48.1k
        block.get_by_position(result).column = std::move(to_column);
243
48.1k
        return Status::OK();
244
48.1k
    };
245
48.1k
}
246
#include "common/compile_check_end.h"
247
} // namespace doris::CastWrapper