Coverage Report

Created: 2026-04-03 05:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/es/es_scroll_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/es/es_scroll_parser.h"
19
20
#include <absl/strings/substitute.h>
21
#include <cctz/time_zone.h>
22
#include <glog/logging.h>
23
#include <rapidjson/allocators.h>
24
#include <rapidjson/encodings.h>
25
#include <stdint.h>
26
#include <string.h>
27
28
// IWYU pragma: no_include <bits/chrono.h>
29
#include <chrono> // IWYU pragma: keep
30
#include <cstdlib>
31
#include <ostream>
32
#include <string>
33
34
#include "common/status.h"
35
#include "core/binary_cast.hpp"
36
#include "core/column/column.h"
37
#include "core/column/column_nullable.h"
38
#include "core/data_type/data_type_array.h"
39
#include "core/data_type/data_type_nullable.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/data_type/primitive_type.h"
42
#include "core/field.h"
43
#include "core/value/decimalv2_value.h"
44
#include "core/value/jsonb_value.h"
45
#include "core/value/vdatetime_value.h"
46
#include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp"
47
#include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
48
#include "exprs/function/cast/cast_to_datev2_impl.hpp"
49
#include "rapidjson/document.h"
50
#include "rapidjson/rapidjson.h"
51
#include "rapidjson/stringbuffer.h"
52
#include "rapidjson/writer.h"
53
#include "runtime/descriptors.h"
54
#include "util/string_parser.hpp"
55
56
namespace doris {
57
#include "common/compile_check_begin.h"
58
59
static const char* FIELD_SCROLL_ID = "_scroll_id";
60
static const char* FIELD_HITS = "hits";
61
static const char* FIELD_INNER_HITS = "hits";
62
static const char* FIELD_SOURCE = "_source";
63
static const char* FIELD_ID = "_id";
64
65
// get the original json data type
66
0
std::string json_type_to_string(rapidjson::Type type) {
67
0
    switch (type) {
68
0
    case rapidjson::kNumberType:
69
0
        return "Number";
70
0
    case rapidjson::kStringType:
71
0
        return "Varchar/Char";
72
0
    case rapidjson::kArrayType:
73
0
        return "Array";
74
0
    case rapidjson::kObjectType:
75
0
        return "Object";
76
0
    case rapidjson::kNullType:
77
0
        return "Null Type";
78
0
    case rapidjson::kFalseType:
79
0
    case rapidjson::kTrueType:
80
0
        return "True/False";
81
0
    default:
82
0
        return "Unknown Type";
83
0
    }
84
0
}
85
86
// transfer rapidjson::Value to string representation
87
1.50k
std::string json_value_to_string(const rapidjson::Value& value) {
88
1.50k
    rapidjson::StringBuffer scratch_buffer;
89
1.50k
    rapidjson::Writer<rapidjson::StringBuffer> temp_writer(scratch_buffer);
90
1.50k
    value.Accept(temp_writer);
91
1.50k
    return scratch_buffer.GetString();
92
1.50k
}
93
94
static const std::string ERROR_INVALID_COL_DATA =
95
        "Data source returned inconsistent column data. "
96
        "Expected value of type {} based on column metadata. This likely indicates a "
97
        "problem with the data source library.";
98
static const std::string ERROR_MEM_LIMIT_EXCEEDED =
99
        "DataSourceScanNode::$0() failed to allocate "
100
        "$1 bytes for $2.";
101
static const std::string ERROR_COL_DATA_IS_ARRAY =
102
        "Data source returned an array for the type $0"
103
        "based on column metadata.";
104
static const std::string INVALID_NULL_VALUE =
105
        "Invalid null value occurs: Non-null column `$0` contains NULL";
106
107
#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array)                    \
108
4.02k
    do {                                                                     \
109
4.02k
        if (col.IsArray() == is_array) {                                     \
110
0
            std::stringstream ss;                                            \
111
0
            ss << "Expected value of type: " << type_to_string(type)         \
112
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
113
0
               << "; Document slice is : " << json_value_to_string(col);     \
114
0
            return Status::RuntimeError(ss.str());                           \
115
0
        }                                                                    \
116
4.02k
    } while (false)
117
118
#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type)                            \
119
4.02k
    do {                                                                        \
120
4.02k
        if (!col.IsString()) {                                                  \
121
0
            std::stringstream ss;                                               \
122
0
            ss << "Expected value of type: " << type_to_string(type)            \
123
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
124
0
               << "; Document source slice is : " << json_value_to_string(col); \
125
0
            return Status::RuntimeError(ss.str());                              \
126
0
        }                                                                       \
127
4.02k
    } while (false)
128
129
#define RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col, type)                         \
130
0
    do {                                                                     \
131
0
        if (!col.IsNumber()) {                                               \
132
0
            std::stringstream ss;                                            \
133
0
            ss << "Expected value of type: " << type_to_string(type)         \
134
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
135
0
               << "; Document value is: " << json_value_to_string(col);      \
136
0
            return Status::RuntimeError(ss.str());                           \
137
0
        }                                                                    \
138
0
    } while (false)
139
140
#define RETURN_ERROR_IF_PARSING_FAILED(result, col, type)                       \
141
220
    do {                                                                        \
142
220
        if (result != StringParser::PARSE_SUCCESS) {                            \
143
0
            std::stringstream ss;                                               \
144
0
            ss << "Expected value of type: " << type_to_string(type)            \
145
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
146
0
               << "; Document source slice is : " << json_value_to_string(col); \
147
0
            return Status::RuntimeError(ss.str());                              \
148
0
        }                                                                       \
149
220
    } while (false)
150
151
#define RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type)                     \
152
0
    do {                                                                 \
153
0
        std::stringstream ss;                                            \
154
0
        ss << "Expected value of type: " << type_to_string(type)         \
155
0
           << "; but found type: " << json_type_to_string(col.GetType()) \
156
0
           << "; Document slice is : " << json_value_to_string(col);     \
157
0
        return Status::RuntimeError(ss.str());                           \
158
0
    } while (false)
159
160
template <typename T>
161
Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
162
13.3k
                     bool pure_doc_value) {
163
13.3k
    if (col.IsNumber()) {
164
13.3k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
13.3k
        return Status::OK();
166
13.3k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
_ZN5doris13get_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
162
2.60k
                     bool pure_doc_value) {
163
2.60k
    if (col.IsNumber()) {
164
2.60k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
2.60k
        return Status::OK();
166
2.60k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
_ZN5doris13get_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
162
2.60k
                     bool pure_doc_value) {
163
2.60k
    if (col.IsNumber()) {
164
2.60k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
2.60k
        return Status::OK();
166
2.60k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
_ZN5doris13get_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
162
2.90k
                     bool pure_doc_value) {
163
2.90k
    if (col.IsNumber()) {
164
2.90k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
2.90k
        return Status::OK();
166
2.90k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
_ZN5doris13get_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
162
3.53k
                     bool pure_doc_value) {
163
3.53k
    if (col.IsNumber()) {
164
3.53k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
3.53k
        return Status::OK();
166
3.53k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
_ZN5doris13get_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
162
1.66k
                     bool pure_doc_value) {
163
1.66k
    if (col.IsNumber()) {
164
1.66k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
165
1.66k
        return Status::OK();
166
1.66k
    }
167
168
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
169
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
170
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
171
0
        return Status::OK();
172
0
    }
173
174
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
175
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
176
177
0
    StringParser::ParseResult result;
178
0
    const std::string& val = col.GetString();
179
0
    size_t len = col.GetStringLength();
180
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
181
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
182
183
0
    if (sizeof(T) < 16) {
184
0
        *reinterpret_cast<T*>(slot) = v;
185
0
    } else {
186
0
        DCHECK(sizeof(T) == 16);
187
0
        memcpy(slot, &v, sizeof(v));
188
0
    }
189
190
0
    return Status::OK();
191
0
}
192
193
template <PrimitiveType T>
194
Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str,
195
                          typename PrimitiveTypeTraits<T>::CppType* slot,
196
4.38k
                          const cctz::time_zone& time_zone) {
197
4.38k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
198
4.38k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
199
4.38k
    if (is_date_str) {
200
4.15k
        const std::string str_date = col.GetString();
201
4.15k
        int str_length = col.GetStringLength();
202
4.15k
        bool success = false;
203
4.15k
        if (str_length > 19) {
204
602
            std::chrono::system_clock::time_point tp;
205
            // time_zone suffix pattern
206
            // Z/+08:00/-04:30
207
602
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
208
602
            bool ok = false;
209
602
            std::string fmt;
210
602
            re2::StringPiece value;
211
602
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
212
                // with time_zone info
213
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
214
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
215
486
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
216
486
                cctz::time_zone ctz;
217
                // find time_zone by time_zone suffix string
218
486
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
219
486
                ok = cctz::parse(fmt, str_date, ctz, &tp);
220
486
            } else {
221
                // without time_zone info
222
                // 2022-08-08T12:10:10.000
223
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
224
                // If the time without time_zone info, ES will assume it is UTC time.
225
                // So we parse it in Doris with UTC time zone.
226
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
227
116
            }
228
602
            if (ok) {
229
                // The local time zone can change by session variable `time_zone`
230
                // We should use the user specified time zone, not the actual system local time zone.
231
602
                success = true;
232
602
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
233
602
            }
234
3.54k
        } else if (str_length == 19) {
235
            // YYYY-MM-DDTHH:MM:SS
236
1.90k
            if (*(str_date.c_str() + 10) == 'T') {
237
40
                std::chrono::system_clock::time_point tp;
238
40
                const bool ok =
239
40
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
240
40
                if (ok) {
241
40
                    success = true;
242
40
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
243
40
                }
244
1.86k
            } else {
245
                // YYYY-MM-DD HH:MM:SS
246
1.86k
                CastParameters params;
247
1.86k
                if constexpr (is_datetime_v1) {
248
0
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
249
0
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
250
0
                                                           dt_val, nullptr, params);
251
0
                } else if constexpr (T == TYPE_DATEV2) {
252
0
                    success = CastToDateV2::from_string_non_strict_mode(
253
0
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
254
1.86k
                } else {
255
1.86k
                    success = CastToDatetimeV2::from_string_non_strict_mode(
256
1.86k
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
257
1.86k
                }
258
1.86k
            }
259
260
1.90k
        } else if (str_length == 13) {
261
            // string long like "1677895728000"
262
0
            int64_t time_long = std::atol(str_date.c_str());
263
0
            if (time_long > 0) {
264
0
                success = true;
265
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
266
0
            }
267
1.64k
        } else {
268
            // YYYY-MM-DD or others
269
1.64k
            CastParameters params;
270
1.64k
            if constexpr (is_datetime_v1) {
271
0
                success = CastToDateOrDatetime::from_string_non_strict_mode<
272
0
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
273
0
                                                       dt_val, nullptr, params);
274
1.30k
            } else if constexpr (T == TYPE_DATEV2) {
275
1.30k
                success = CastToDateV2::from_string_non_strict_mode(
276
1.30k
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
277
1.30k
            } else {
278
344
                success = CastToDatetimeV2::from_string_non_strict_mode(
279
344
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
280
344
            }
281
1.64k
        }
282
283
4.15k
        if (!success) {
284
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
285
0
        }
286
287
4.15k
    } else {
288
236
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
289
236
    }
290
4.38k
    if constexpr (is_datetime_v1) {
291
0
        if (type == TYPE_DATE) {
292
0
            dt_val.cast_to_date();
293
0
        } else {
294
0
            dt_val.to_datetime();
295
0
        }
296
0
    }
297
298
4.38k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
299
4.38k
    return Status::OK();
300
4.38k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
196
1.30k
                          const cctz::time_zone& time_zone) {
197
1.30k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
198
1.30k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
199
1.30k
    if (is_date_str) {
200
1.30k
        const std::string str_date = col.GetString();
201
1.30k
        int str_length = col.GetStringLength();
202
1.30k
        bool success = false;
203
1.30k
        if (str_length > 19) {
204
0
            std::chrono::system_clock::time_point tp;
205
            // time_zone suffix pattern
206
            // Z/+08:00/-04:30
207
0
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
208
0
            bool ok = false;
209
0
            std::string fmt;
210
0
            re2::StringPiece value;
211
0
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
212
                // with time_zone info
213
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
214
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
215
0
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
216
0
                cctz::time_zone ctz;
217
                // find time_zone by time_zone suffix string
218
0
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
219
0
                ok = cctz::parse(fmt, str_date, ctz, &tp);
220
0
            } else {
221
                // without time_zone info
222
                // 2022-08-08T12:10:10.000
223
0
                fmt = "%Y-%m-%dT%H:%M:%E*S";
224
                // If the time without time_zone info, ES will assume it is UTC time.
225
                // So we parse it in Doris with UTC time zone.
226
0
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
227
0
            }
228
0
            if (ok) {
229
                // The local time zone can change by session variable `time_zone`
230
                // We should use the user specified time zone, not the actual system local time zone.
231
0
                success = true;
232
0
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
233
0
            }
234
1.30k
        } else if (str_length == 19) {
235
            // YYYY-MM-DDTHH:MM:SS
236
0
            if (*(str_date.c_str() + 10) == 'T') {
237
0
                std::chrono::system_clock::time_point tp;
238
0
                const bool ok =
239
0
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
240
0
                if (ok) {
241
0
                    success = true;
242
0
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
243
0
                }
244
0
            } else {
245
                // YYYY-MM-DD HH:MM:SS
246
0
                CastParameters params;
247
                if constexpr (is_datetime_v1) {
248
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
249
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
250
                                                           dt_val, nullptr, params);
251
0
                } else if constexpr (T == TYPE_DATEV2) {
252
0
                    success = CastToDateV2::from_string_non_strict_mode(
253
0
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
254
                } else {
255
                    success = CastToDatetimeV2::from_string_non_strict_mode(
256
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
257
                }
258
0
            }
259
260
1.30k
        } else if (str_length == 13) {
261
            // string long like "1677895728000"
262
0
            int64_t time_long = std::atol(str_date.c_str());
263
0
            if (time_long > 0) {
264
0
                success = true;
265
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
266
0
            }
267
1.30k
        } else {
268
            // YYYY-MM-DD or others
269
1.30k
            CastParameters params;
270
            if constexpr (is_datetime_v1) {
271
                success = CastToDateOrDatetime::from_string_non_strict_mode<
272
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
273
                                                       dt_val, nullptr, params);
274
1.30k
            } else if constexpr (T == TYPE_DATEV2) {
275
1.30k
                success = CastToDateV2::from_string_non_strict_mode(
276
1.30k
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
277
            } else {
278
                success = CastToDatetimeV2::from_string_non_strict_mode(
279
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
280
            }
281
1.30k
        }
282
283
1.30k
        if (!success) {
284
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
285
0
        }
286
287
1.30k
    } else {
288
0
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
289
0
    }
290
    if constexpr (is_datetime_v1) {
291
        if (type == TYPE_DATE) {
292
            dt_val.cast_to_date();
293
        } else {
294
            dt_val.to_datetime();
295
        }
296
    }
297
298
1.30k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
299
1.30k
    return Status::OK();
300
1.30k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
196
3.08k
                          const cctz::time_zone& time_zone) {
197
3.08k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
198
3.08k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
199
3.08k
    if (is_date_str) {
200
2.85k
        const std::string str_date = col.GetString();
201
2.85k
        int str_length = col.GetStringLength();
202
2.85k
        bool success = false;
203
2.85k
        if (str_length > 19) {
204
602
            std::chrono::system_clock::time_point tp;
205
            // time_zone suffix pattern
206
            // Z/+08:00/-04:30
207
602
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
208
602
            bool ok = false;
209
602
            std::string fmt;
210
602
            re2::StringPiece value;
211
602
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
212
                // with time_zone info
213
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
214
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
215
486
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
216
486
                cctz::time_zone ctz;
217
                // find time_zone by time_zone suffix string
218
486
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
219
486
                ok = cctz::parse(fmt, str_date, ctz, &tp);
220
486
            } else {
221
                // without time_zone info
222
                // 2022-08-08T12:10:10.000
223
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
224
                // If the time without time_zone info, ES will assume it is UTC time.
225
                // So we parse it in Doris with UTC time zone.
226
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
227
116
            }
228
602
            if (ok) {
229
                // The local time zone can change by session variable `time_zone`
230
                // We should use the user specified time zone, not the actual system local time zone.
231
602
                success = true;
232
602
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
233
602
            }
234
2.24k
        } else if (str_length == 19) {
235
            // YYYY-MM-DDTHH:MM:SS
236
1.90k
            if (*(str_date.c_str() + 10) == 'T') {
237
40
                std::chrono::system_clock::time_point tp;
238
40
                const bool ok =
239
40
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
240
40
                if (ok) {
241
40
                    success = true;
242
40
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
243
40
                }
244
1.86k
            } else {
245
                // YYYY-MM-DD HH:MM:SS
246
1.86k
                CastParameters params;
247
                if constexpr (is_datetime_v1) {
248
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
249
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
250
                                                           dt_val, nullptr, params);
251
                } else if constexpr (T == TYPE_DATEV2) {
252
                    success = CastToDateV2::from_string_non_strict_mode(
253
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
254
1.86k
                } else {
255
1.86k
                    success = CastToDatetimeV2::from_string_non_strict_mode(
256
1.86k
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
257
1.86k
                }
258
1.86k
            }
259
260
1.90k
        } else if (str_length == 13) {
261
            // string long like "1677895728000"
262
0
            int64_t time_long = std::atol(str_date.c_str());
263
0
            if (time_long > 0) {
264
0
                success = true;
265
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
266
0
            }
267
344
        } else {
268
            // YYYY-MM-DD or others
269
344
            CastParameters params;
270
            if constexpr (is_datetime_v1) {
271
                success = CastToDateOrDatetime::from_string_non_strict_mode<
272
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
273
                                                       dt_val, nullptr, params);
274
            } else if constexpr (T == TYPE_DATEV2) {
275
                success = CastToDateV2::from_string_non_strict_mode(
276
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
277
344
            } else {
278
344
                success = CastToDatetimeV2::from_string_non_strict_mode(
279
344
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
280
344
            }
281
344
        }
282
283
2.85k
        if (!success) {
284
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
285
0
        }
286
287
2.85k
    } else {
288
236
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
289
236
    }
290
    if constexpr (is_datetime_v1) {
291
        if (type == TYPE_DATE) {
292
            dt_val.cast_to_date();
293
        } else {
294
            dt_val.to_datetime();
295
        }
296
    }
297
298
3.08k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
299
3.08k
    return Status::OK();
300
3.08k
}
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
301
302
template <PrimitiveType T>
303
Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
304
                    typename PrimitiveTypeTraits<T>::CppType* slot,
305
4.38k
                    const cctz::time_zone& time_zone) {
306
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
307
4.38k
    if (col.IsNumber()) {
308
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
309
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
310
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
311
148
        return get_date_value_int<T>(col, type, false, slot, time_zone);
312
4.23k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
313
        // this would happened just only when `enable_docvalue_scan = true`
314
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
315
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
316
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
317
        // date field's docvalue
318
432
        if (col[0].IsString()) {
319
344
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
320
344
        }
321
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
322
88
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
323
3.80k
    } else {
324
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
325
3.80k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
326
3.80k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
327
3.80k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
328
3.80k
    }
329
4.38k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
305
1.30k
                    const cctz::time_zone& time_zone) {
306
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
307
1.30k
    if (col.IsNumber()) {
308
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
309
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
310
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
311
0
        return get_date_value_int<T>(col, type, false, slot, time_zone);
312
1.30k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
313
        // this would happened just only when `enable_docvalue_scan = true`
314
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
315
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
316
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
317
        // date field's docvalue
318
0
        if (col[0].IsString()) {
319
0
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
320
0
        }
321
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
322
0
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
323
1.30k
    } else {
324
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
325
1.30k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
326
1.30k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
327
1.30k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
328
1.30k
    }
329
1.30k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
305
3.08k
                    const cctz::time_zone& time_zone) {
306
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
307
3.08k
    if (col.IsNumber()) {
308
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
309
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
310
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
311
148
        return get_date_value_int<T>(col, type, false, slot, time_zone);
312
2.93k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
313
        // this would happened just only when `enable_docvalue_scan = true`
314
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
315
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
316
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
317
        // date field's docvalue
318
432
        if (col[0].IsString()) {
319
344
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
320
344
        }
321
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
322
88
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
323
2.50k
    } else {
324
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
325
2.50k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
326
2.50k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
327
2.50k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
328
2.50k
    }
329
3.08k
}
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
330
template <PrimitiveType T>
331
Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
332
1.78k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
333
1.78k
    typename PrimitiveTypeTraits<T>::CppType data;
334
1.78k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
335
1.78k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
336
1.78k
    return Status::OK();
337
1.78k
}
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
_ZN5doris13fill_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Line
Count
Source
332
1.78k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
333
1.78k
    typename PrimitiveTypeTraits<T>::CppType data;
334
1.78k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
335
1.78k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
336
1.78k
    return Status::OK();
337
1.78k
}
338
339
template <typename T>
340
Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
341
10.4k
                       bool pure_doc_value) {
342
10.4k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
343
10.4k
    if (col.IsNumber()) {
344
10.4k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
345
10.4k
        return Status::OK();
346
10.4k
    }
347
348
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
349
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
350
0
        return Status::OK();
351
0
    }
352
353
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
354
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
355
356
0
    StringParser::ParseResult result;
357
0
    const std::string& val = col.GetString();
358
0
    size_t len = col.GetStringLength();
359
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
360
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
361
0
    *reinterpret_cast<T*>(slot) = v;
362
363
0
    return Status::OK();
364
0
}
_ZN5doris15get_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
341
5.20k
                       bool pure_doc_value) {
342
5.20k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
343
5.20k
    if (col.IsNumber()) {
344
5.20k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
345
5.20k
        return Status::OK();
346
5.20k
    }
347
348
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
349
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
350
0
        return Status::OK();
351
0
    }
352
353
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
354
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
355
356
0
    StringParser::ParseResult result;
357
0
    const std::string& val = col.GetString();
358
0
    size_t len = col.GetStringLength();
359
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
360
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
361
0
    *reinterpret_cast<T*>(slot) = v;
362
363
0
    return Status::OK();
364
0
}
_ZN5doris15get_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
341
5.20k
                       bool pure_doc_value) {
342
5.20k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
343
5.20k
    if (col.IsNumber()) {
344
5.20k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
345
5.20k
        return Status::OK();
346
5.20k
    }
347
348
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
349
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
350
0
        return Status::OK();
351
0
    }
352
353
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
354
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
355
356
0
    StringParser::ParseResult result;
357
0
    const std::string& val = col.GetString();
358
0
    size_t len = col.GetStringLength();
359
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
360
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
361
0
    *reinterpret_cast<T*>(slot) = v;
362
363
0
    return Status::OK();
364
0
}
365
366
template <typename T>
367
Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
368
344
                          bool pure_doc_value, bool nullable) {
369
344
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
370
344
    if (col.IsNumber() && nullable) {
371
344
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
372
344
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
373
344
        return Status::OK();
374
344
    }
375
376
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
377
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
378
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
379
0
        return Status::OK();
380
0
    }
381
382
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
383
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
384
385
0
    StringParser::ParseResult result;
386
0
    const std::string& val = col.GetString();
387
0
    size_t len = col.GetStringLength();
388
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
389
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
390
391
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
392
393
0
    return Status::OK();
394
0
}
_ZN5doris18insert_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
368
344
                          bool pure_doc_value, bool nullable) {
369
344
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
370
344
    if (col.IsNumber() && nullable) {
371
344
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
372
344
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
373
344
        return Status::OK();
374
344
    }
375
376
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
377
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
378
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
379
0
        return Status::OK();
380
0
    }
381
382
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
383
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
384
385
0
    StringParser::ParseResult result;
386
0
    const std::string& val = col.GetString();
387
0
    size_t len = col.GetStringLength();
388
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
389
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
390
391
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
392
393
0
    return Status::OK();
394
0
}
Unexecuted instantiation: _ZN5doris18insert_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
395
396
template <typename T>
397
Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
398
964
                        bool pure_doc_value, bool nullable) {
399
964
    if (col.IsNumber()) {
400
572
        T value;
401
        // ES allows inserting float and double in int/long types.
402
        // To parse these numbers in Doris, we direct cast them to int types.
403
572
        if (col.IsDouble()) {
404
152
            value = static_cast<T>(col.GetDouble());
405
420
        } else if (col.IsFloat()) {
406
0
            value = static_cast<T>(col.GetFloat());
407
420
        } else {
408
420
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
409
420
        }
410
572
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
411
572
        return Status::OK();
412
572
    }
413
414
392
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
415
220
        StringParser::ParseResult result;
416
220
        std::string val = col_value.GetString();
417
        // ES allows inserting numbers and characters containing decimals in numeric types.
418
        // To parse these numbers in Doris, we remove the decimals here.
419
220
        size_t pos = val.find('.');
420
220
        if (pos != std::string::npos) {
421
84
            val = val.substr(0, pos);
422
84
        }
423
220
        size_t len = val.length();
424
220
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
425
220
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
426
427
220
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
428
220
        return Status::OK();
429
220
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
_ZZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Line
Count
Source
414
220
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
415
220
        StringParser::ParseResult result;
416
220
        std::string val = col_value.GetString();
417
        // ES allows inserting numbers and characters containing decimals in numeric types.
418
        // To parse these numbers in Doris, we remove the decimals here.
419
220
        size_t pos = val.find('.');
420
220
        if (pos != std::string::npos) {
421
84
            val = val.substr(0, pos);
422
84
        }
423
220
        size_t len = val.length();
424
220
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
425
220
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
426
427
220
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
428
220
        return Status::OK();
429
220
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
430
431
392
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
432
274
        if (col[0].IsNumber()) {
433
172
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
434
172
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
435
172
            return Status::OK();
436
172
        } else {
437
102
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
438
102
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
439
102
            return parse_and_insert_data(col[0]);
440
102
        }
441
274
    }
442
443
118
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
444
118
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
445
118
    return parse_and_insert_data(col);
446
118
}
Unexecuted instantiation: _ZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Unexecuted instantiation: _ZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
_ZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
398
44
                        bool pure_doc_value, bool nullable) {
399
44
    if (col.IsNumber()) {
400
44
        T value;
401
        // ES allows inserting float and double in int/long types.
402
        // To parse these numbers in Doris, we direct cast them to int types.
403
44
        if (col.IsDouble()) {
404
0
            value = static_cast<T>(col.GetDouble());
405
44
        } else if (col.IsFloat()) {
406
0
            value = static_cast<T>(col.GetFloat());
407
44
        } else {
408
44
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
409
44
        }
410
44
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
411
44
        return Status::OK();
412
44
    }
413
414
0
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
415
0
        StringParser::ParseResult result;
416
0
        std::string val = col_value.GetString();
417
        // ES allows inserting numbers and characters containing decimals in numeric types.
418
        // To parse these numbers in Doris, we remove the decimals here.
419
0
        size_t pos = val.find('.');
420
0
        if (pos != std::string::npos) {
421
0
            val = val.substr(0, pos);
422
0
        }
423
0
        size_t len = val.length();
424
0
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
425
0
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
426
427
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
428
0
        return Status::OK();
429
0
    };
430
431
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
432
0
        if (col[0].IsNumber()) {
433
0
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
434
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
435
0
            return Status::OK();
436
0
        } else {
437
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
438
0
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
439
0
            return parse_and_insert_data(col[0]);
440
0
        }
441
0
    }
442
443
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
444
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
445
0
    return parse_and_insert_data(col);
446
0
}
_ZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
398
920
                        bool pure_doc_value, bool nullable) {
399
920
    if (col.IsNumber()) {
400
528
        T value;
401
        // ES allows inserting float and double in int/long types.
402
        // To parse these numbers in Doris, we direct cast them to int types.
403
528
        if (col.IsDouble()) {
404
152
            value = static_cast<T>(col.GetDouble());
405
376
        } else if (col.IsFloat()) {
406
0
            value = static_cast<T>(col.GetFloat());
407
376
        } else {
408
376
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
409
376
        }
410
528
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
411
528
        return Status::OK();
412
528
    }
413
414
392
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
415
392
        StringParser::ParseResult result;
416
392
        std::string val = col_value.GetString();
417
        // ES allows inserting numbers and characters containing decimals in numeric types.
418
        // To parse these numbers in Doris, we remove the decimals here.
419
392
        size_t pos = val.find('.');
420
392
        if (pos != std::string::npos) {
421
392
            val = val.substr(0, pos);
422
392
        }
423
392
        size_t len = val.length();
424
392
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
425
392
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
426
427
392
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
428
392
        return Status::OK();
429
392
    };
430
431
392
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
432
274
        if (col[0].IsNumber()) {
433
172
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
434
172
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
435
172
            return Status::OK();
436
172
        } else {
437
102
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
438
102
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
439
102
            return parse_and_insert_data(col[0]);
440
102
        }
441
274
    }
442
443
118
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
444
118
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
445
118
    return parse_and_insert_data(col);
446
118
}
Unexecuted instantiation: _ZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
447
448
template <PrimitiveType T>
449
Status handle_value(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
450
31.8k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
13.3k
                  T == TYPE_LARGEINT) {
453
13.3k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
13.3k
                                                                                pure_doc_value));
455
13.3k
        return Status::OK();
456
13.3k
    }
457
5.20k
    if constexpr (T == TYPE_FLOAT) {
458
5.20k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
5.20k
        return Status::OK();
460
5.20k
    }
461
5.20k
    if constexpr (T == TYPE_DOUBLE) {
462
5.20k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
5.20k
        return Status::OK();
464
5.20k
    }
465
5.50k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
5.50k
        if (col.IsArray()) {
470
0
            val = json_value_to_string(col);
471
5.50k
        } else if (!col.IsString()) {
472
0
            val = json_value_to_string(col);
473
5.50k
        } else {
474
5.50k
            val = col.GetString();
475
5.50k
        }
476
5.50k
        return Status::OK();
477
5.50k
    }
478
2.60k
    if constexpr (T == TYPE_BOOLEAN) {
479
2.60k
        if (col.IsBool()) {
480
2.60k
            val = col.GetBool();
481
2.60k
            return Status::OK();
482
2.60k
        }
483
484
0
        if (col.IsNumber()) {
485
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
0
            return Status::OK();
487
0
        }
488
489
0
        bool is_nested_str = false;
490
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
0
            val = col[0].GetBool();
492
0
            return Status::OK();
493
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
0
            is_nested_str = true;
495
0
        } else if (pure_doc_value && col.IsArray()) {
496
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
0
        }
498
499
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
0
        const std::string& str_val = str_col.GetString();
501
0
        size_t val_size = str_col.GetStringLength();
502
0
        StringParser::ParseResult result;
503
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
0
        return Status::OK();
506
0
    }
507
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
31.8k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
5.50k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
                  T == TYPE_LARGEINT) {
453
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
                                                                                pure_doc_value));
455
        return Status::OK();
456
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
5.50k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
5.50k
        if (col.IsArray()) {
470
0
            val = json_value_to_string(col);
471
5.50k
        } else if (!col.IsString()) {
472
0
            val = json_value_to_string(col);
473
5.50k
        } else {
474
5.50k
            val = col.GetString();
475
5.50k
        }
476
5.50k
        return Status::OK();
477
5.50k
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
5.50k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
5.50k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
2.60k
                  T == TYPE_LARGEINT) {
453
2.60k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
2.60k
                                                                                pure_doc_value));
455
2.60k
        return Status::OK();
456
2.60k
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
2.60k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
2.60k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
2.60k
                  T == TYPE_LARGEINT) {
453
2.60k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
2.60k
                                                                                pure_doc_value));
455
2.60k
        return Status::OK();
456
2.60k
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
2.60k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
2.60k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
2.90k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
2.90k
                  T == TYPE_LARGEINT) {
453
2.90k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
2.90k
                                                                                pure_doc_value));
455
2.90k
        return Status::OK();
456
2.90k
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
2.90k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
2.90k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
3.53k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
3.53k
                  T == TYPE_LARGEINT) {
453
3.53k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
3.53k
                                                                                pure_doc_value));
455
3.53k
        return Status::OK();
456
3.53k
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
3.53k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
3.53k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
1.66k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
1.66k
                  T == TYPE_LARGEINT) {
453
1.66k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
1.66k
                                                                                pure_doc_value));
455
1.66k
        return Status::OK();
456
1.66k
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
1.66k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
1.66k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
5.20k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
                  T == TYPE_LARGEINT) {
453
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
                                                                                pure_doc_value));
455
        return Status::OK();
456
    }
457
5.20k
    if constexpr (T == TYPE_FLOAT) {
458
5.20k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
5.20k
        return Status::OK();
460
5.20k
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
5.20k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
5.20k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
5.20k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
                  T == TYPE_LARGEINT) {
453
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
                                                                                pure_doc_value));
455
        return Status::OK();
456
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
5.20k
    if constexpr (T == TYPE_DOUBLE) {
462
5.20k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
5.20k
        return Status::OK();
464
5.20k
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
    if constexpr (T == TYPE_BOOLEAN) {
479
        if (col.IsBool()) {
480
            val = col.GetBool();
481
            return Status::OK();
482
        }
483
484
        if (col.IsNumber()) {
485
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
            return Status::OK();
487
        }
488
489
        bool is_nested_str = false;
490
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
            val = col[0].GetBool();
492
            return Status::OK();
493
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
            is_nested_str = true;
495
        } else if (pure_doc_value && col.IsArray()) {
496
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
        }
498
499
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
        const std::string& str_val = str_col.GetString();
501
        size_t val_size = str_col.GetStringLength();
502
        StringParser::ParseResult result;
503
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
        return Status::OK();
506
    }
507
5.20k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
5.20k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
450
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
451
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
452
                  T == TYPE_LARGEINT) {
453
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
454
                                                                                pure_doc_value));
455
        return Status::OK();
456
    }
457
    if constexpr (T == TYPE_FLOAT) {
458
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
459
        return Status::OK();
460
    }
461
    if constexpr (T == TYPE_DOUBLE) {
462
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
463
        return Status::OK();
464
    }
465
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
466
        // When ES mapping is keyword/text but actual data is an array,
467
        // serialize the array to JSON string instead of throwing an error.
468
        // This is valid in ES since any field can hold array values.
469
        if (col.IsArray()) {
470
            val = json_value_to_string(col);
471
        } else if (!col.IsString()) {
472
            val = json_value_to_string(col);
473
        } else {
474
            val = col.GetString();
475
        }
476
        return Status::OK();
477
    }
478
2.60k
    if constexpr (T == TYPE_BOOLEAN) {
479
2.60k
        if (col.IsBool()) {
480
2.60k
            val = col.GetBool();
481
2.60k
            return Status::OK();
482
2.60k
        }
483
484
0
        if (col.IsNumber()) {
485
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
486
0
            return Status::OK();
487
0
        }
488
489
0
        bool is_nested_str = false;
490
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
491
0
            val = col[0].GetBool();
492
0
            return Status::OK();
493
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
494
0
            is_nested_str = true;
495
0
        } else if (pure_doc_value && col.IsArray()) {
496
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
497
0
        }
498
499
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
500
0
        const std::string& str_val = str_col.GetString();
501
0
        size_t val_size = str_col.GetStringLength();
502
0
        StringParser::ParseResult result;
503
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
504
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
505
0
        return Status::OK();
506
0
    }
507
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
508
2.60k
}
509
510
template <PrimitiveType T>
511
Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type,
512
31.8k
                             bool pure_doc_value, Array& array) {
513
31.8k
    typename PrimitiveTypeTraits<T>::CppType val;
514
31.8k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
31.8k
    array.push_back(Field::create_field<T>(val));
516
31.8k
    return Status::OK();
517
31.8k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
5.50k
                             bool pure_doc_value, Array& array) {
513
5.50k
    typename PrimitiveTypeTraits<T>::CppType val;
514
5.50k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
5.50k
    array.push_back(Field::create_field<T>(val));
516
5.50k
    return Status::OK();
517
5.50k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
2.60k
                             bool pure_doc_value, Array& array) {
513
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
514
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
2.60k
    array.push_back(Field::create_field<T>(val));
516
2.60k
    return Status::OK();
517
2.60k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
2.60k
                             bool pure_doc_value, Array& array) {
513
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
514
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
2.60k
    array.push_back(Field::create_field<T>(val));
516
2.60k
    return Status::OK();
517
2.60k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
2.90k
                             bool pure_doc_value, Array& array) {
513
2.90k
    typename PrimitiveTypeTraits<T>::CppType val;
514
2.90k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
2.90k
    array.push_back(Field::create_field<T>(val));
516
2.90k
    return Status::OK();
517
2.90k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
3.53k
                             bool pure_doc_value, Array& array) {
513
3.53k
    typename PrimitiveTypeTraits<T>::CppType val;
514
3.53k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
3.53k
    array.push_back(Field::create_field<T>(val));
516
3.53k
    return Status::OK();
517
3.53k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
1.66k
                             bool pure_doc_value, Array& array) {
513
1.66k
    typename PrimitiveTypeTraits<T>::CppType val;
514
1.66k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
1.66k
    array.push_back(Field::create_field<T>(val));
516
1.66k
    return Status::OK();
517
1.66k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
5.20k
                             bool pure_doc_value, Array& array) {
513
5.20k
    typename PrimitiveTypeTraits<T>::CppType val;
514
5.20k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
5.20k
    array.push_back(Field::create_field<T>(val));
516
5.20k
    return Status::OK();
517
5.20k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
5.20k
                             bool pure_doc_value, Array& array) {
513
5.20k
    typename PrimitiveTypeTraits<T>::CppType val;
514
5.20k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
5.20k
    array.push_back(Field::create_field<T>(val));
516
5.20k
    return Status::OK();
517
5.20k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
512
2.60k
                             bool pure_doc_value, Array& array) {
513
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
514
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
515
2.60k
    array.push_back(Field::create_field<T>(val));
516
2.60k
    return Status::OK();
517
2.60k
}
518
519
template <PrimitiveType T>
520
Status process_column_array(const rapidjson::Value& col, PrimitiveType sub_type,
521
8.64k
                            bool pure_doc_value, Array& array) {
522
31.7k
    for (const auto& sub_col : col.GetArray()) {
523
31.7k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
31.7k
    }
525
8.64k
    return Status::OK();
526
8.64k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
2.04k
                            bool pure_doc_value, Array& array) {
522
5.45k
    for (const auto& sub_col : col.GetArray()) {
523
5.45k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
5.45k
    }
525
2.04k
    return Status::OK();
526
2.04k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
650
                            bool pure_doc_value, Array& array) {
522
2.60k
    for (const auto& sub_col : col.GetArray()) {
523
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
2.60k
    }
525
650
    return Status::OK();
526
650
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
650
                            bool pure_doc_value, Array& array) {
522
2.60k
    for (const auto& sub_col : col.GetArray()) {
523
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
2.60k
    }
525
650
    return Status::OK();
526
650
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
746
                            bool pure_doc_value, Array& array) {
522
2.85k
    for (const auto& sub_col : col.GetArray()) {
523
2.85k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
2.85k
    }
525
746
    return Status::OK();
526
746
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
884
                            bool pure_doc_value, Array& array) {
522
3.53k
    for (const auto& sub_col : col.GetArray()) {
523
3.53k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
3.53k
    }
525
884
    return Status::OK();
526
884
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
416
                            bool pure_doc_value, Array& array) {
522
1.66k
    for (const auto& sub_col : col.GetArray()) {
523
1.66k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
1.66k
    }
525
416
    return Status::OK();
526
416
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
1.30k
                            bool pure_doc_value, Array& array) {
522
5.20k
    for (const auto& sub_col : col.GetArray()) {
523
5.20k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
5.20k
    }
525
1.30k
    return Status::OK();
526
1.30k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
1.30k
                            bool pure_doc_value, Array& array) {
522
5.20k
    for (const auto& sub_col : col.GetArray()) {
523
5.20k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
5.20k
    }
525
1.30k
    return Status::OK();
526
1.30k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
521
650
                            bool pure_doc_value, Array& array) {
522
2.60k
    for (const auto& sub_col : col.GetArray()) {
523
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
524
2.60k
    }
525
650
    return Status::OK();
526
650
}
527
528
template <PrimitiveType T>
529
Status process_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
530
8.73k
                      Array& array) {
531
8.73k
    if (!col.IsArray()) {
532
96
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
8.64k
    } else {
534
8.64k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
8.64k
    }
536
8.73k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
2.09k
                      Array& array) {
531
2.09k
    if (!col.IsArray()) {
532
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
2.04k
    } else {
534
2.04k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
2.04k
    }
536
2.09k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
650
                      Array& array) {
531
650
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
650
    } else {
534
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
650
    }
536
650
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
650
                      Array& array) {
531
650
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
650
    } else {
534
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
650
    }
536
650
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
794
                      Array& array) {
531
794
    if (!col.IsArray()) {
532
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
746
    } else {
534
746
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
746
    }
536
794
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
884
                      Array& array) {
531
884
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
884
    } else {
534
884
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
884
    }
536
884
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
416
                      Array& array) {
531
416
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
416
    } else {
534
416
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
416
    }
536
416
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
1.30k
                      Array& array) {
531
1.30k
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
1.30k
    } else {
534
1.30k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
1.30k
    }
536
1.30k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
1.30k
                      Array& array) {
531
1.30k
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
1.30k
    } else {
534
1.30k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
1.30k
    }
536
1.30k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
530
650
                      Array& array) {
531
650
    if (!col.IsArray()) {
532
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
533
650
    } else {
534
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
535
650
    }
536
650
}
537
538
template <PrimitiveType T>
539
Status process_date_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
540
1.30k
                           Array& array, const cctz::time_zone& time_zone) {
541
1.30k
    if (!col.IsArray()) {
542
0
        typename PrimitiveTypeTraits<T>::CppType data;
543
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
544
0
        array.push_back(Field::create_field<T>(data));
545
1.30k
    } else {
546
2.60k
        for (const auto& sub_col : col.GetArray()) {
547
2.60k
            typename PrimitiveTypeTraits<T>::CppType data;
548
2.60k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
549
2.60k
            array.push_back(Field::create_field<T>(data));
550
2.60k
        }
551
1.30k
    }
552
1.30k
    return Status::OK();
553
1.30k
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
540
650
                           Array& array, const cctz::time_zone& time_zone) {
541
650
    if (!col.IsArray()) {
542
0
        typename PrimitiveTypeTraits<T>::CppType data;
543
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
544
0
        array.push_back(Field::create_field<T>(data));
545
650
    } else {
546
1.30k
        for (const auto& sub_col : col.GetArray()) {
547
1.30k
            typename PrimitiveTypeTraits<T>::CppType data;
548
1.30k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
549
1.30k
            array.push_back(Field::create_field<T>(data));
550
1.30k
        }
551
650
    }
552
650
    return Status::OK();
553
650
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
540
650
                           Array& array, const cctz::time_zone& time_zone) {
541
650
    if (!col.IsArray()) {
542
0
        typename PrimitiveTypeTraits<T>::CppType data;
543
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
544
0
        array.push_back(Field::create_field<T>(data));
545
650
    } else {
546
1.30k
        for (const auto& sub_col : col.GetArray()) {
547
1.30k
            typename PrimitiveTypeTraits<T>::CppType data;
548
1.30k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
549
1.30k
            array.push_back(Field::create_field<T>(data));
550
1.30k
        }
551
650
    }
552
650
    return Status::OK();
553
650
}
554
555
Status process_jsonb_column(const rapidjson::Value& col, PrimitiveType sub_type,
556
144
                            bool pure_doc_value, Array& array) {
557
144
    if (!col.IsArray()) {
558
0
        JsonBinaryValue jsonb_value;
559
0
        RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
560
0
        JsonbField json(jsonb_value.value(), jsonb_value.size());
561
0
        array.push_back(Field::create_field<TYPE_JSONB>(std::move(json)));
562
144
    } else {
563
304
        for (const auto& sub_col : col.GetArray()) {
564
304
            JsonBinaryValue jsonb_value;
565
304
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(sub_col)));
566
304
            JsonbField json(jsonb_value.value(), jsonb_value.size());
567
304
            array.push_back(Field::create_field<TYPE_JSONB>(json));
568
304
        }
569
144
    }
570
144
    return Status::OK();
571
144
}
572
573
Status ScrollParser::parse_column(const rapidjson::Value& col, PrimitiveType sub_type,
574
                                  bool pure_doc_value, Array& array,
575
10.1k
                                  const cctz::time_zone& time_zone) {
576
10.1k
    switch (sub_type) {
577
0
    case TYPE_CHAR:
578
0
    case TYPE_VARCHAR:
579
2.09k
    case TYPE_STRING:
580
2.09k
        return process_column<TYPE_STRING>(col, sub_type, pure_doc_value, array);
581
650
    case TYPE_TINYINT:
582
650
        return process_column<TYPE_TINYINT>(col, sub_type, pure_doc_value, array);
583
650
    case TYPE_SMALLINT:
584
650
        return process_column<TYPE_SMALLINT>(col, sub_type, pure_doc_value, array);
585
794
    case TYPE_INT:
586
794
        return process_column<TYPE_INT>(col, sub_type, pure_doc_value, array);
587
884
    case TYPE_BIGINT:
588
884
        return process_column<TYPE_BIGINT>(col, sub_type, pure_doc_value, array);
589
416
    case TYPE_LARGEINT:
590
416
        return process_column<TYPE_LARGEINT>(col, sub_type, pure_doc_value, array);
591
1.30k
    case TYPE_FLOAT:
592
1.30k
        return process_column<TYPE_FLOAT>(col, sub_type, pure_doc_value, array);
593
1.30k
    case TYPE_DOUBLE:
594
1.30k
        return process_column<TYPE_DOUBLE>(col, sub_type, pure_doc_value, array);
595
650
    case TYPE_BOOLEAN:
596
650
        return process_column<TYPE_BOOLEAN>(col, sub_type, pure_doc_value, array);
597
    // date/datetime v2 is the default type for catalog table,
598
    // see https://github.com/apache/doris/pull/16304
599
    // No need to support date and datetime types.
600
650
    case TYPE_DATEV2: {
601
650
        return process_date_column<TYPE_DATEV2>(col, sub_type, pure_doc_value, array, time_zone);
602
0
    }
603
650
    case TYPE_DATETIMEV2: {
604
650
        return process_date_column<TYPE_DATETIMEV2>(col, sub_type, pure_doc_value, array,
605
650
                                                    time_zone);
606
0
    }
607
144
    case TYPE_JSONB: {
608
144
        return process_jsonb_column(col, sub_type, pure_doc_value, array);
609
0
    }
610
0
    default:
611
0
        LOG(ERROR) << "Do not support Array type: " << sub_type;
612
0
        return Status::InternalError("Unsupported type");
613
10.1k
    }
614
10.1k
}
615
616
640
ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {}
617
618
640
ScrollParser::~ScrollParser() = default;
619
620
640
Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) {
621
    // rely on `_size !=0 ` to determine whether scroll ends
622
640
    _size = 0;
623
640
    _document_node.Parse(scroll_result.c_str(), scroll_result.length());
624
640
    if (_document_node.HasParseError()) {
625
0
        return Status::InternalError("Parsing json error, json is: {}", scroll_result);
626
0
    }
627
628
640
    if (!exactly_once && !_document_node.HasMember(FIELD_SCROLL_ID)) {
629
0
        LOG(WARNING) << "Document has not a scroll id field scroll response:" << scroll_result;
630
0
        return Status::InternalError("Document has not a scroll id field");
631
0
    }
632
633
640
    if (!exactly_once) {
634
640
        const rapidjson::Value& scroll_node = _document_node[FIELD_SCROLL_ID];
635
640
        _scroll_id = scroll_node.GetString();
636
640
    }
637
    // { hits: { total : 2, "hits" : [ {}, {}, {} ]}}
638
640
    const rapidjson::Value& outer_hits_node = _document_node[FIELD_HITS];
639
    // if has no inner hits, there has no data in this index
640
640
    if (!outer_hits_node.HasMember(FIELD_INNER_HITS)) {
641
4
        return Status::OK();
642
4
    }
643
636
    const rapidjson::Value& inner_hits_node = outer_hits_node[FIELD_INNER_HITS];
644
    // this happened just the end of scrolling
645
636
    if (!inner_hits_node.IsArray()) {
646
0
        return Status::OK();
647
0
    }
648
636
    _inner_hits_node.CopyFrom(inner_hits_node, _document_node.GetAllocator());
649
    // how many documents contains in this batch
650
636
    _size = _inner_hits_node.Size();
651
636
    return Status::OK();
652
636
}
653
654
1.27k
int ScrollParser::get_size() const {
655
1.27k
    return _size;
656
1.27k
}
657
658
640
const std::string& ScrollParser::get_scroll_id() {
659
640
    return _scroll_id;
660
640
}
661
662
Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
663
                                  std::vector<MutableColumnPtr>& columns, bool* line_eof,
664
                                  const std::map<std::string, std::string>& docvalue_context,
665
2.77k
                                  const cctz::time_zone& time_zone) {
666
2.77k
    *line_eof = true;
667
668
2.77k
    if (_size <= 0 || _line_index >= _size) {
669
636
        return Status::OK();
670
636
    }
671
672
2.13k
    const rapidjson::Value& obj = _inner_hits_node[_line_index++];
673
2.13k
    bool pure_doc_value = false;
674
2.13k
    if (obj.HasMember("fields")) {
675
712
        pure_doc_value = true;
676
712
    }
677
    // obj may be neither have `_source` nor `fields` field.
678
2.13k
    const rapidjson::Value* line = nullptr;
679
2.13k
    if (obj.HasMember(FIELD_SOURCE)) {
680
1.41k
        line = &obj[FIELD_SOURCE];
681
1.41k
    } else if (obj.HasMember("fields")) {
682
712
        line = &obj["fields"];
683
712
    }
684
685
18.9k
    for (int i = 0; i < tuple_desc->slots().size(); ++i) {
686
16.8k
        const SlotDescriptor* slot_desc = tuple_desc->slots()[i];
687
16.8k
        auto* col_ptr = columns[i].get();
688
689
16.8k
        if (slot_desc->col_name() == FIELD_ID) {
690
            // actually this branch will not be reached, this is guaranteed by Doris FE.
691
0
            if (pure_doc_value) {
692
0
                return Status::RuntimeError("obtain `_id` is not supported in doc_values mode");
693
0
            }
694
            // obj[FIELD_ID] must not be NULL
695
0
            std::string _id = obj[FIELD_ID].GetString();
696
0
            size_t len = _id.length();
697
698
0
            col_ptr->insert_data(const_cast<const char*>(_id.data()), len);
699
0
            continue;
700
0
        }
701
702
16.8k
        const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str()
703
16.8k
                                              : slot_desc->col_name().c_str();
704
705
16.8k
        if (line == nullptr || line->FindMember(col_name) == line->MemberEnd()) {
706
522
            if (slot_desc->is_nullable()) {
707
522
                auto* nullable_column = reinterpret_cast<ColumnNullable*>(col_ptr);
708
522
                nullable_column->insert_data(nullptr, 0);
709
522
                continue;
710
522
            } else {
711
0
                std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
712
0
                return Status::RuntimeError(details);
713
0
            }
714
522
        }
715
716
16.2k
        const rapidjson::Value& col = (*line)[col_name];
717
718
16.2k
        auto type = slot_desc->type()->get_primitive_type();
719
720
        // when the column value is null, the subsequent type casting will report an error
721
16.2k
        if (col.IsNull() && slot_desc->is_nullable()) {
722
4
            col_ptr->insert_data(nullptr, 0);
723
4
            continue;
724
16.2k
        } else if (col.IsNull() && !slot_desc->is_nullable()) {
725
0
            std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
726
0
            return Status::RuntimeError(details);
727
0
        }
728
16.2k
        switch (type) {
729
0
        case TYPE_CHAR:
730
0
        case TYPE_VARCHAR:
731
1.86k
        case TYPE_STRING: {
732
            // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
733
            // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
734
            // this may be a tricky, but we can work around this issue
735
1.86k
            std::string val;
736
1.86k
            if (pure_doc_value) {
737
4
                if (col.Empty()) {
738
0
                    break;
739
4
                } else if (col.Size() > 1) {
740
                    // doc_values with multiple elements means actual array data
741
                    // in ES keyword/text field, serialize as JSON array string
742
0
                    val = json_value_to_string(col);
743
4
                } else if (!col[0].IsString()) {
744
0
                    val = json_value_to_string(col[0]);
745
4
                } else {
746
4
                    val = col[0].GetString();
747
4
                }
748
1.86k
            } else {
749
                // When ES mapping is keyword/text but actual data is an array,
750
                // serialize the array to JSON string instead of throwing an error.
751
                // This is valid in ES since any field can hold array values.
752
1.86k
                if (col.IsArray()) {
753
64
                    val = json_value_to_string(col);
754
1.80k
                } else if (!col.IsString()) {
755
0
                    val = json_value_to_string(col);
756
1.80k
                } else {
757
1.80k
                    val = col.GetString();
758
1.80k
                }
759
1.86k
            }
760
1.86k
            size_t val_size = val.length();
761
1.86k
            col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
762
1.86k
            break;
763
1.86k
        }
764
765
0
        case TYPE_TINYINT: {
766
0
            RETURN_IF_ERROR(insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value,
767
0
                                                     slot_desc->is_nullable()));
768
0
            break;
769
0
        }
770
771
0
        case TYPE_SMALLINT: {
772
0
            RETURN_IF_ERROR(insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value,
773
0
                                                      slot_desc->is_nullable()));
774
0
            break;
775
0
        }
776
777
44
        case TYPE_INT: {
778
44
            RETURN_IF_ERROR(insert_int_value<int32_t>(col, type, col_ptr, pure_doc_value,
779
44
                                                      slot_desc->is_nullable()));
780
44
            break;
781
44
        }
782
783
920
        case TYPE_BIGINT: {
784
920
            RETURN_IF_ERROR(insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value,
785
920
                                                      slot_desc->is_nullable()));
786
920
            break;
787
920
        }
788
789
920
        case TYPE_LARGEINT: {
790
0
            RETURN_IF_ERROR(insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
791
0
                                                       slot_desc->is_nullable()));
792
0
            break;
793
0
        }
794
795
344
        case TYPE_DOUBLE: {
796
344
            RETURN_IF_ERROR(insert_float_value<double>(col, type, col_ptr, pure_doc_value,
797
344
                                                       slot_desc->is_nullable()));
798
344
            break;
799
344
        }
800
801
344
        case TYPE_FLOAT: {
802
0
            RETURN_IF_ERROR(insert_float_value<float>(col, type, col_ptr, pure_doc_value,
803
0
                                                      slot_desc->is_nullable()));
804
0
            break;
805
0
        }
806
807
0
        case TYPE_BOOLEAN: {
808
0
            if (col.IsBool()) {
809
0
                int8_t val = col.GetBool();
810
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
811
0
                break;
812
0
            }
813
814
0
            if (col.IsNumber()) {
815
0
                int8_t val = static_cast<int8_t>(col.GetInt());
816
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
817
0
                break;
818
0
            }
819
820
0
            bool is_nested_str = false;
821
0
            if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
822
0
                int8_t val = col[0].GetBool();
823
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
824
0
                break;
825
0
            } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
826
0
                is_nested_str = true;
827
0
            } else if (pure_doc_value && col.IsArray()) {
828
0
                return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
829
0
            }
830
831
0
            const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
832
833
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
834
835
0
            const std::string& val = str_col.GetString();
836
0
            size_t val_size = str_col.GetStringLength();
837
0
            StringParser::ParseResult result;
838
0
            bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
839
0
            RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
840
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
841
0
            break;
842
0
        }
843
0
        case TYPE_DECIMALV2: {
844
0
            DecimalV2Value data;
845
846
0
            if (col.IsDouble()) {
847
0
                data.assign_from_double(col.GetDouble());
848
0
            } else {
849
0
                std::string val;
850
0
                if (pure_doc_value) {
851
0
                    if (col.Empty()) {
852
0
                        break;
853
0
                    } else if (!col[0].IsString()) {
854
0
                        val = json_value_to_string(col[0]);
855
0
                    } else {
856
0
                        val = col[0].GetString();
857
0
                    }
858
0
                } else {
859
0
                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
860
0
                    if (!col.IsString()) {
861
0
                        val = json_value_to_string(col);
862
0
                    } else {
863
0
                        val = col.GetString();
864
0
                    }
865
0
                }
866
0
                data.parse_from_str(val.data(), static_cast<int32_t>(val.length()));
867
0
            }
868
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
869
0
            break;
870
0
        }
871
872
0
        case TYPE_DATE:
873
0
            RETURN_IF_ERROR(
874
0
                    fill_date_int<TYPE_DATE>(col, type, pure_doc_value, col_ptr, time_zone));
875
0
            break;
876
0
        case TYPE_DATETIME:
877
0
            RETURN_IF_ERROR(
878
0
                    fill_date_int<TYPE_DATETIME>(col, type, pure_doc_value, col_ptr, time_zone));
879
0
            break;
880
0
        case TYPE_DATEV2:
881
0
            RETURN_IF_ERROR(
882
0
                    fill_date_int<TYPE_DATEV2>(col, type, pure_doc_value, col_ptr, time_zone));
883
0
            break;
884
1.78k
        case TYPE_DATETIMEV2: {
885
1.78k
            RETURN_IF_ERROR(
886
1.78k
                    fill_date_int<TYPE_DATETIMEV2>(col, type, pure_doc_value, col_ptr, time_zone));
887
1.78k
            break;
888
1.78k
        }
889
10.1k
        case TYPE_ARRAY: {
890
10.1k
            Array array;
891
10.1k
            const auto& sub_type = assert_cast<const DataTypeArray*>(
892
10.1k
                                           remove_nullable(tuple_desc->slots()[i]->type()).get())
893
10.1k
                                           ->get_nested_type()
894
10.1k
                                           ->get_primitive_type();
895
10.1k
            RETURN_IF_ERROR(parse_column(col, sub_type, pure_doc_value, array, time_zone));
896
10.1k
            col_ptr->insert(Field::create_field<TYPE_ARRAY>(array));
897
10.1k
            break;
898
10.1k
        }
899
1.13k
        case TYPE_JSONB: {
900
1.13k
            JsonBinaryValue jsonb_value;
901
1.13k
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
902
1.13k
            JsonbField json(jsonb_value.value(), jsonb_value.size());
903
1.13k
            col_ptr->insert(Field::create_field<TYPE_JSONB>(json));
904
1.13k
            break;
905
1.13k
        }
906
0
        default: {
907
0
            LOG(ERROR) << "Unsupported data type: " << type_to_string(type);
908
0
            DCHECK(false);
909
0
            break;
910
1.13k
        }
911
16.2k
        }
912
16.2k
    }
913
914
2.13k
    *line_eof = false;
915
2.13k
    return Status::OK();
916
2.13k
}
917
#include "common/compile_check_end.h"
918
} // namespace doris