Coverage Report

Created: 2026-03-16 13:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/es/es_scroll_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/es/es_scroll_parser.h"
19
20
#include <absl/strings/substitute.h>
21
#include <cctz/time_zone.h>
22
#include <glog/logging.h>
23
#include <rapidjson/allocators.h>
24
#include <rapidjson/encodings.h>
25
#include <stdint.h>
26
#include <string.h>
27
28
// IWYU pragma: no_include <bits/chrono.h>
29
#include <chrono> // IWYU pragma: keep
30
#include <cstdlib>
31
#include <ostream>
32
#include <string>
33
34
#include "common/status.h"
35
#include "core/binary_cast.hpp"
36
#include "core/column/column.h"
37
#include "core/column/column_nullable.h"
38
#include "core/data_type/data_type_array.h"
39
#include "core/data_type/data_type_nullable.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/data_type/primitive_type.h"
42
#include "core/field.h"
43
#include "core/value/decimalv2_value.h"
44
#include "core/value/jsonb_value.h"
45
#include "core/value/vdatetime_value.h"
46
#include "rapidjson/document.h"
47
#include "rapidjson/rapidjson.h"
48
#include "rapidjson/stringbuffer.h"
49
#include "rapidjson/writer.h"
50
#include "runtime/descriptors.h"
51
#include "util/string_parser.hpp"
52
53
namespace doris {
54
#include "common/compile_check_begin.h"
55
56
static const char* FIELD_SCROLL_ID = "_scroll_id";
57
static const char* FIELD_HITS = "hits";
58
static const char* FIELD_INNER_HITS = "hits";
59
static const char* FIELD_SOURCE = "_source";
60
static const char* FIELD_ID = "_id";
61
62
// get the original json data type
63
0
std::string json_type_to_string(rapidjson::Type type) {
64
0
    switch (type) {
65
0
    case rapidjson::kNumberType:
66
0
        return "Number";
67
0
    case rapidjson::kStringType:
68
0
        return "Varchar/Char";
69
0
    case rapidjson::kArrayType:
70
0
        return "Array";
71
0
    case rapidjson::kObjectType:
72
0
        return "Object";
73
0
    case rapidjson::kNullType:
74
0
        return "Null Type";
75
0
    case rapidjson::kFalseType:
76
0
    case rapidjson::kTrueType:
77
0
        return "True/False";
78
0
    default:
79
0
        return "Unknown Type";
80
0
    }
81
0
}
82
83
// transfer rapidjson::Value to string representation
84
1.70k
std::string json_value_to_string(const rapidjson::Value& value) {
85
1.70k
    rapidjson::StringBuffer scratch_buffer;
86
1.70k
    rapidjson::Writer<rapidjson::StringBuffer> temp_writer(scratch_buffer);
87
1.70k
    value.Accept(temp_writer);
88
1.70k
    return scratch_buffer.GetString();
89
1.70k
}
90
91
static const std::string ERROR_INVALID_COL_DATA =
92
        "Data source returned inconsistent column data. "
93
        "Expected value of type {} based on column metadata. This likely indicates a "
94
        "problem with the data source library.";
95
static const std::string ERROR_MEM_LIMIT_EXCEEDED =
96
        "DataSourceScanNode::$0() failed to allocate "
97
        "$1 bytes for $2.";
98
static const std::string ERROR_COL_DATA_IS_ARRAY =
99
        "Data source returned an array for the type $0"
100
        "based on column metadata.";
101
static const std::string INVALID_NULL_VALUE =
102
        "Invalid null value occurs: Non-null column `$0` contains NULL";
103
104
#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array)                    \
105
4.51k
    do {                                                                     \
106
4.51k
        if (col.IsArray() == is_array) {                                     \
107
0
            std::stringstream ss;                                            \
108
0
            ss << "Expected value of type: " << type_to_string(type)         \
109
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
110
0
               << "; Document slice is : " << json_value_to_string(col);     \
111
0
            return Status::RuntimeError(ss.str());                           \
112
0
        }                                                                    \
113
4.51k
    } while (false)
114
115
#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type)                            \
116
4.51k
    do {                                                                        \
117
4.51k
        if (!col.IsString()) {                                                  \
118
0
            std::stringstream ss;                                               \
119
0
            ss << "Expected value of type: " << type_to_string(type)            \
120
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
121
0
               << "; Document source slice is : " << json_value_to_string(col); \
122
0
            return Status::RuntimeError(ss.str());                              \
123
0
        }                                                                       \
124
4.51k
    } while (false)
125
126
#define RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col, type)                         \
127
0
    do {                                                                     \
128
0
        if (!col.IsNumber()) {                                               \
129
0
            std::stringstream ss;                                            \
130
0
            ss << "Expected value of type: " << type_to_string(type)         \
131
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
132
0
               << "; Document value is: " << json_value_to_string(col);      \
133
0
            return Status::RuntimeError(ss.str());                           \
134
0
        }                                                                    \
135
0
    } while (false)
136
137
#define RETURN_ERROR_IF_PARSING_FAILED(result, col, type)                       \
138
236
    do {                                                                        \
139
236
        if (result != StringParser::PARSE_SUCCESS) {                            \
140
0
            std::stringstream ss;                                               \
141
0
            ss << "Expected value of type: " << type_to_string(type)            \
142
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
143
0
               << "; Document source slice is : " << json_value_to_string(col); \
144
0
            return Status::RuntimeError(ss.str());                              \
145
0
        }                                                                       \
146
236
    } while (false)
147
148
#define RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type)                     \
149
0
    do {                                                                 \
150
0
        std::stringstream ss;                                            \
151
0
        ss << "Expected value of type: " << type_to_string(type)         \
152
0
           << "; but found type: " << json_type_to_string(col.GetType()) \
153
0
           << "; Document slice is : " << json_value_to_string(col);     \
154
0
        return Status::RuntimeError(ss.str());                           \
155
0
    } while (false)
156
157
template <typename T>
158
Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
159
14.5k
                     bool pure_doc_value) {
160
14.5k
    if (col.IsNumber()) {
161
14.5k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
14.5k
        return Status::OK();
163
14.5k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
_ZN5doris13get_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
159
2.85k
                     bool pure_doc_value) {
160
2.85k
    if (col.IsNumber()) {
161
2.85k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
2.85k
        return Status::OK();
163
2.85k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
_ZN5doris13get_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
159
2.85k
                     bool pure_doc_value) {
160
2.85k
    if (col.IsNumber()) {
161
2.85k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
2.85k
        return Status::OK();
163
2.85k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
_ZN5doris13get_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
159
3.16k
                     bool pure_doc_value) {
160
3.16k
    if (col.IsNumber()) {
161
3.16k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
3.16k
        return Status::OK();
163
3.16k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
_ZN5doris13get_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
159
3.79k
                     bool pure_doc_value) {
160
3.79k
    if (col.IsNumber()) {
161
3.79k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
3.79k
        return Status::OK();
163
3.79k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
_ZN5doris13get_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
159
1.92k
                     bool pure_doc_value) {
160
1.92k
    if (col.IsNumber()) {
161
1.92k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
162
1.92k
        return Status::OK();
163
1.92k
    }
164
165
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
166
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
167
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
168
0
        return Status::OK();
169
0
    }
170
171
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
172
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
173
174
0
    StringParser::ParseResult result;
175
0
    const std::string& val = col.GetString();
176
0
    size_t len = col.GetStringLength();
177
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
178
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
179
180
0
    if (sizeof(T) < 16) {
181
0
        *reinterpret_cast<T*>(slot) = v;
182
0
    } else {
183
0
        DCHECK(sizeof(T) == 16);
184
0
        memcpy(slot, &v, sizeof(v));
185
0
    }
186
187
0
    return Status::OK();
188
0
}
189
190
template <PrimitiveType T>
191
Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str,
192
                          typename PrimitiveTypeTraits<T>::CppType* slot,
193
5.19k
                          const cctz::time_zone& time_zone) {
194
5.19k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
195
5.19k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
196
5.19k
    if (is_date_str) {
197
4.84k
        const std::string str_date = col.GetString();
198
4.84k
        int str_length = col.GetStringLength();
199
4.84k
        bool success = false;
200
4.84k
        if (str_length > 19) {
201
730
            std::chrono::system_clock::time_point tp;
202
            // time_zone suffix pattern
203
            // Z/+08:00/-04:30
204
730
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
205
730
            bool ok = false;
206
730
            std::string fmt;
207
730
            re2::StringPiece value;
208
730
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
209
                // with time_zone info
210
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
211
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
212
614
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
213
614
                cctz::time_zone ctz;
214
                // find time_zone by time_zone suffix string
215
614
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
216
614
                ok = cctz::parse(fmt, str_date, ctz, &tp);
217
614
            } else {
218
                // without time_zone info
219
                // 2022-08-08T12:10:10.000
220
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
221
                // If the time without time_zone info, ES will assume it is UTC time.
222
                // So we parse it in Doris with UTC time zone.
223
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
224
116
            }
225
730
            if (ok) {
226
                // The local time zone can change by session variable `time_zone`
227
                // We should use the user specified time zone, not the actual system local time zone.
228
730
                success = true;
229
730
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
230
730
            }
231
4.11k
        } else if (str_length == 19) {
232
            // YYYY-MM-DDTHH:MM:SS
233
2.20k
            if (*(str_date.c_str() + 10) == 'T') {
234
48
                std::chrono::system_clock::time_point tp;
235
48
                const bool ok =
236
48
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
237
48
                if (ok) {
238
48
                    success = true;
239
48
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
240
48
                }
241
2.15k
            } else {
242
                // YYYY-MM-DD HH:MM:SS
243
2.15k
                success = dt_val.from_date_str(str_date.c_str(), str_length);
244
2.15k
            }
245
246
2.20k
        } else if (str_length == 13) {
247
            // string long like "1677895728000"
248
80
            int64_t time_long = std::atol(str_date.c_str());
249
80
            if (time_long > 0) {
250
80
                success = true;
251
80
                dt_val.from_unixtime(time_long / 1000, time_zone);
252
80
            }
253
1.83k
        } else {
254
            // YYYY-MM-DD or others
255
1.83k
            success = dt_val.from_date_str(str_date.c_str(), str_length);
256
1.83k
        }
257
258
4.84k
        if (!success) {
259
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
260
0
        }
261
262
4.84k
    } else {
263
352
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
264
352
    }
265
5.19k
    if constexpr (is_datetime_v1) {
266
0
        if (type == TYPE_DATE) {
267
0
            dt_val.cast_to_date();
268
0
        } else {
269
0
            dt_val.to_datetime();
270
0
        }
271
0
    }
272
273
5.19k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
274
5.19k
    return Status::OK();
275
5.19k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
193
1.54k
                          const cctz::time_zone& time_zone) {
194
1.54k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
195
1.54k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
196
1.54k
    if (is_date_str) {
197
1.54k
        const std::string str_date = col.GetString();
198
1.54k
        int str_length = col.GetStringLength();
199
1.54k
        bool success = false;
200
1.54k
        if (str_length > 19) {
201
40
            std::chrono::system_clock::time_point tp;
202
            // time_zone suffix pattern
203
            // Z/+08:00/-04:30
204
40
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
205
40
            bool ok = false;
206
40
            std::string fmt;
207
40
            re2::StringPiece value;
208
40
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
209
                // with time_zone info
210
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
211
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
212
40
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
213
40
                cctz::time_zone ctz;
214
                // find time_zone by time_zone suffix string
215
40
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
216
40
                ok = cctz::parse(fmt, str_date, ctz, &tp);
217
40
            } else {
218
                // without time_zone info
219
                // 2022-08-08T12:10:10.000
220
0
                fmt = "%Y-%m-%dT%H:%M:%E*S";
221
                // If the time without time_zone info, ES will assume it is UTC time.
222
                // So we parse it in Doris with UTC time zone.
223
0
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
224
0
            }
225
40
            if (ok) {
226
                // The local time zone can change by session variable `time_zone`
227
                // We should use the user specified time zone, not the actual system local time zone.
228
40
                success = true;
229
40
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
230
40
            }
231
1.50k
        } else if (str_length == 19) {
232
            // YYYY-MM-DDTHH:MM:SS
233
8
            if (*(str_date.c_str() + 10) == 'T') {
234
0
                std::chrono::system_clock::time_point tp;
235
0
                const bool ok =
236
0
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
237
0
                if (ok) {
238
0
                    success = true;
239
0
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
240
0
                }
241
8
            } else {
242
                // YYYY-MM-DD HH:MM:SS
243
8
                success = dt_val.from_date_str(str_date.c_str(), str_length);
244
8
            }
245
246
1.49k
        } else if (str_length == 13) {
247
            // string long like "1677895728000"
248
0
            int64_t time_long = std::atol(str_date.c_str());
249
0
            if (time_long > 0) {
250
0
                success = true;
251
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
252
0
            }
253
1.49k
        } else {
254
            // YYYY-MM-DD or others
255
1.49k
            success = dt_val.from_date_str(str_date.c_str(), str_length);
256
1.49k
        }
257
258
1.54k
        if (!success) {
259
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
260
0
        }
261
262
1.54k
    } else {
263
0
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
264
0
    }
265
    if constexpr (is_datetime_v1) {
266
        if (type == TYPE_DATE) {
267
            dt_val.cast_to_date();
268
        } else {
269
            dt_val.to_datetime();
270
        }
271
    }
272
273
1.54k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
274
1.54k
    return Status::OK();
275
1.54k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
193
3.65k
                          const cctz::time_zone& time_zone) {
194
3.65k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
195
3.65k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
196
3.65k
    if (is_date_str) {
197
3.30k
        const std::string str_date = col.GetString();
198
3.30k
        int str_length = col.GetStringLength();
199
3.30k
        bool success = false;
200
3.30k
        if (str_length > 19) {
201
690
            std::chrono::system_clock::time_point tp;
202
            // time_zone suffix pattern
203
            // Z/+08:00/-04:30
204
690
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
205
690
            bool ok = false;
206
690
            std::string fmt;
207
690
            re2::StringPiece value;
208
690
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
209
                // with time_zone info
210
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
211
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
212
574
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
213
574
                cctz::time_zone ctz;
214
                // find time_zone by time_zone suffix string
215
574
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
216
574
                ok = cctz::parse(fmt, str_date, ctz, &tp);
217
574
            } else {
218
                // without time_zone info
219
                // 2022-08-08T12:10:10.000
220
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
221
                // If the time without time_zone info, ES will assume it is UTC time.
222
                // So we parse it in Doris with UTC time zone.
223
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
224
116
            }
225
690
            if (ok) {
226
                // The local time zone can change by session variable `time_zone`
227
                // We should use the user specified time zone, not the actual system local time zone.
228
690
                success = true;
229
690
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
230
690
            }
231
2.61k
        } else if (str_length == 19) {
232
            // YYYY-MM-DDTHH:MM:SS
233
2.19k
            if (*(str_date.c_str() + 10) == 'T') {
234
48
                std::chrono::system_clock::time_point tp;
235
48
                const bool ok =
236
48
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
237
48
                if (ok) {
238
48
                    success = true;
239
48
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
240
48
                }
241
2.14k
            } else {
242
                // YYYY-MM-DD HH:MM:SS
243
2.14k
                success = dt_val.from_date_str(str_date.c_str(), str_length);
244
2.14k
            }
245
246
2.19k
        } else if (str_length == 13) {
247
            // string long like "1677895728000"
248
80
            int64_t time_long = std::atol(str_date.c_str());
249
80
            if (time_long > 0) {
250
80
                success = true;
251
80
                dt_val.from_unixtime(time_long / 1000, time_zone);
252
80
            }
253
340
        } else {
254
            // YYYY-MM-DD or others
255
340
            success = dt_val.from_date_str(str_date.c_str(), str_length);
256
340
        }
257
258
3.30k
        if (!success) {
259
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
260
0
        }
261
262
3.30k
    } else {
263
352
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
264
352
    }
265
    if constexpr (is_datetime_v1) {
266
        if (type == TYPE_DATE) {
267
            dt_val.cast_to_date();
268
        } else {
269
            dt_val.to_datetime();
270
        }
271
    }
272
273
3.65k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
274
3.65k
    return Status::OK();
275
3.65k
}
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
276
277
template <PrimitiveType T>
278
Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
279
                    typename PrimitiveTypeTraits<T>::CppType* slot,
280
5.20k
                    const cctz::time_zone& time_zone) {
281
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
282
5.20k
    if (col.IsNumber()) {
283
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
284
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
285
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
286
260
        return get_date_value_int<T>(col, type, false, slot, time_zone);
287
4.94k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
288
        // this would happened just only when `enable_docvalue_scan = true`
289
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
290
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
291
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
292
        // date field's docvalue
293
666
        if (col[0].IsString()) {
294
576
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
295
576
        }
296
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
297
90
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
298
4.27k
    } else {
299
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
300
4.27k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
301
4.27k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
302
4.27k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
303
4.27k
    }
304
5.20k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
280
1.54k
                    const cctz::time_zone& time_zone) {
281
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
282
1.54k
    if (col.IsNumber()) {
283
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
284
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
285
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
286
0
        return get_date_value_int<T>(col, type, false, slot, time_zone);
287
1.54k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
288
        // this would happened just only when `enable_docvalue_scan = true`
289
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
290
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
291
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
292
        // date field's docvalue
293
40
        if (col[0].IsString()) {
294
40
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
295
40
        }
296
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
297
0
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
298
1.50k
    } else {
299
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
300
1.50k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
301
1.50k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
302
1.50k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
303
1.50k
    }
304
1.54k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
280
3.66k
                    const cctz::time_zone& time_zone) {
281
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
282
3.66k
    if (col.IsNumber()) {
283
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
284
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
285
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
286
260
        return get_date_value_int<T>(col, type, false, slot, time_zone);
287
3.40k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
288
        // this would happened just only when `enable_docvalue_scan = true`
289
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
290
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
291
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
292
        // date field's docvalue
293
626
        if (col[0].IsString()) {
294
536
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
295
536
        }
296
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
297
90
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
298
2.77k
    } else {
299
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
300
2.77k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
301
2.77k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
302
2.77k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
303
2.77k
    }
304
3.66k
}
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
305
template <PrimitiveType T>
306
Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
307
2.34k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
308
2.34k
    typename PrimitiveTypeTraits<T>::CppType data;
309
2.34k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
310
2.34k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
311
2.34k
    return Status::OK();
312
2.34k
}
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
_ZN5doris13fill_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Line
Count
Source
307
104
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
308
104
    typename PrimitiveTypeTraits<T>::CppType data;
309
104
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
310
104
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
311
104
    return Status::OK();
312
104
}
_ZN5doris13fill_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Line
Count
Source
307
2.24k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
308
2.24k
    typename PrimitiveTypeTraits<T>::CppType data;
309
2.24k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
310
2.24k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
311
2.24k
    return Status::OK();
312
2.24k
}
313
314
template <typename T>
315
Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
316
11.4k
                       bool pure_doc_value) {
317
11.4k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
318
11.4k
    if (col.IsNumber()) {
319
11.4k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
320
11.4k
        return Status::OK();
321
11.4k
    }
322
323
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
324
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
325
0
        return Status::OK();
326
0
    }
327
328
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
329
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
330
331
0
    StringParser::ParseResult result;
332
0
    const std::string& val = col.GetString();
333
0
    size_t len = col.GetStringLength();
334
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
335
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
336
0
    *reinterpret_cast<T*>(slot) = v;
337
338
0
    return Status::OK();
339
0
}
_ZN5doris15get_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
316
5.71k
                       bool pure_doc_value) {
317
5.71k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
318
5.71k
    if (col.IsNumber()) {
319
5.71k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
320
5.71k
        return Status::OK();
321
5.71k
    }
322
323
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
324
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
325
0
        return Status::OK();
326
0
    }
327
328
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
329
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
330
331
0
    StringParser::ParseResult result;
332
0
    const std::string& val = col.GetString();
333
0
    size_t len = col.GetStringLength();
334
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
335
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
336
0
    *reinterpret_cast<T*>(slot) = v;
337
338
0
    return Status::OK();
339
0
}
_ZN5doris15get_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
316
5.71k
                       bool pure_doc_value) {
317
5.71k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
318
5.71k
    if (col.IsNumber()) {
319
5.71k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
320
5.71k
        return Status::OK();
321
5.71k
    }
322
323
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
324
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
325
0
        return Status::OK();
326
0
    }
327
328
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
329
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
330
331
0
    StringParser::ParseResult result;
332
0
    const std::string& val = col.GetString();
333
0
    size_t len = col.GetStringLength();
334
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
335
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
336
0
    *reinterpret_cast<T*>(slot) = v;
337
338
0
    return Status::OK();
339
0
}
340
341
template <typename T>
342
Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
343
408
                          bool pure_doc_value, bool nullable) {
344
408
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
345
408
    if (col.IsNumber() && nullable) {
346
408
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
347
408
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
348
408
        return Status::OK();
349
408
    }
350
351
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
352
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
353
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
354
0
        return Status::OK();
355
0
    }
356
357
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
358
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
359
360
0
    StringParser::ParseResult result;
361
0
    const std::string& val = col.GetString();
362
0
    size_t len = col.GetStringLength();
363
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
364
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
365
366
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
367
368
0
    return Status::OK();
369
0
}
_ZN5doris18insert_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
343
408
                          bool pure_doc_value, bool nullable) {
344
408
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
345
408
    if (col.IsNumber() && nullable) {
346
408
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
347
408
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
348
408
        return Status::OK();
349
408
    }
350
351
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
352
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
353
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
354
0
        return Status::OK();
355
0
    }
356
357
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
358
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
359
360
0
    StringParser::ParseResult result;
361
0
    const std::string& val = col.GetString();
362
0
    size_t len = col.GetStringLength();
363
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
364
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
365
366
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
367
368
0
    return Status::OK();
369
0
}
Unexecuted instantiation: _ZN5doris18insert_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
370
371
template <typename T>
372
Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
373
1.06k
                        bool pure_doc_value, bool nullable) {
374
1.06k
    if (col.IsNumber()) {
375
616
        T value;
376
        // ES allows inserting float and double in int/long types.
377
        // To parse these numbers in Doris, we direct cast them to int types.
378
616
        if (col.IsDouble()) {
379
168
            value = static_cast<T>(col.GetDouble());
380
448
        } else if (col.IsFloat()) {
381
0
            value = static_cast<T>(col.GetFloat());
382
448
        } else {
383
448
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
384
448
        }
385
616
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
386
616
        return Status::OK();
387
616
    }
388
389
448
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
390
236
        StringParser::ParseResult result;
391
236
        std::string val = col_value.GetString();
392
        // ES allows inserting numbers and characters containing decimals in numeric types.
393
        // To parse these numbers in Doris, we remove the decimals here.
394
236
        size_t pos = val.find('.');
395
236
        if (pos != std::string::npos) {
396
100
            val = val.substr(0, pos);
397
100
        }
398
236
        size_t len = val.length();
399
236
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
400
236
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
401
402
236
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
403
236
        return Status::OK();
404
236
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
_ZZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Line
Count
Source
389
236
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
390
236
        StringParser::ParseResult result;
391
236
        std::string val = col_value.GetString();
392
        // ES allows inserting numbers and characters containing decimals in numeric types.
393
        // To parse these numbers in Doris, we remove the decimals here.
394
236
        size_t pos = val.find('.');
395
236
        if (pos != std::string::npos) {
396
100
            val = val.substr(0, pos);
397
100
        }
398
236
        size_t len = val.length();
399
236
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
400
236
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
401
402
236
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
403
236
        return Status::OK();
404
236
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
405
406
448
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
407
316
        if (col[0].IsNumber()) {
408
212
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
409
212
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
212
            return Status::OK();
411
212
        } else {
412
104
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
413
104
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
414
104
            return parse_and_insert_data(col[0]);
415
104
        }
416
316
    }
417
418
132
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
419
132
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
420
132
    return parse_and_insert_data(col);
421
132
}
Unexecuted instantiation: _ZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Unexecuted instantiation: _ZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
_ZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
373
44
                        bool pure_doc_value, bool nullable) {
374
44
    if (col.IsNumber()) {
375
44
        T value;
376
        // ES allows inserting float and double in int/long types.
377
        // To parse these numbers in Doris, we direct cast them to int types.
378
44
        if (col.IsDouble()) {
379
0
            value = static_cast<T>(col.GetDouble());
380
44
        } else if (col.IsFloat()) {
381
0
            value = static_cast<T>(col.GetFloat());
382
44
        } else {
383
44
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
384
44
        }
385
44
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
386
44
        return Status::OK();
387
44
    }
388
389
0
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
390
0
        StringParser::ParseResult result;
391
0
        std::string val = col_value.GetString();
392
        // ES allows inserting numbers and characters containing decimals in numeric types.
393
        // To parse these numbers in Doris, we remove the decimals here.
394
0
        size_t pos = val.find('.');
395
0
        if (pos != std::string::npos) {
396
0
            val = val.substr(0, pos);
397
0
        }
398
0
        size_t len = val.length();
399
0
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
400
0
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
401
402
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
403
0
        return Status::OK();
404
0
    };
405
406
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
407
0
        if (col[0].IsNumber()) {
408
0
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
409
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
0
            return Status::OK();
411
0
        } else {
412
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
413
0
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
414
0
            return parse_and_insert_data(col[0]);
415
0
        }
416
0
    }
417
418
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
419
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
420
0
    return parse_and_insert_data(col);
421
0
}
_ZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
373
1.02k
                        bool pure_doc_value, bool nullable) {
374
1.02k
    if (col.IsNumber()) {
375
572
        T value;
376
        // ES allows inserting float and double in int/long types.
377
        // To parse these numbers in Doris, we direct cast them to int types.
378
572
        if (col.IsDouble()) {
379
168
            value = static_cast<T>(col.GetDouble());
380
404
        } else if (col.IsFloat()) {
381
0
            value = static_cast<T>(col.GetFloat());
382
404
        } else {
383
404
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
384
404
        }
385
572
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
386
572
        return Status::OK();
387
572
    }
388
389
448
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
390
448
        StringParser::ParseResult result;
391
448
        std::string val = col_value.GetString();
392
        // ES allows inserting numbers and characters containing decimals in numeric types.
393
        // To parse these numbers in Doris, we remove the decimals here.
394
448
        size_t pos = val.find('.');
395
448
        if (pos != std::string::npos) {
396
448
            val = val.substr(0, pos);
397
448
        }
398
448
        size_t len = val.length();
399
448
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
400
448
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
401
402
448
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
403
448
        return Status::OK();
404
448
    };
405
406
448
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
407
316
        if (col[0].IsNumber()) {
408
212
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
409
212
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
212
            return Status::OK();
411
212
        } else {
412
104
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
413
104
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
414
104
            return parse_and_insert_data(col[0]);
415
104
        }
416
316
    }
417
418
132
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
419
132
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
420
132
    return parse_and_insert_data(col);
421
132
}
Unexecuted instantiation: _ZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
422
423
template <PrimitiveType T>
424
Status handle_value(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
425
34.8k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
14.5k
                  T == TYPE_LARGEINT) {
428
14.5k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
14.5k
                                                                                pure_doc_value));
430
14.5k
        return Status::OK();
431
14.5k
    }
432
5.71k
    if constexpr (T == TYPE_FLOAT) {
433
5.71k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
5.71k
        return Status::OK();
435
5.71k
    }
436
5.71k
    if constexpr (T == TYPE_DOUBLE) {
437
5.71k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
5.71k
        return Status::OK();
439
5.71k
    }
440
6.02k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
6.02k
        if (col.IsArray()) {
445
0
            val = json_value_to_string(col);
446
6.02k
        } else if (!col.IsString()) {
447
8
            val = json_value_to_string(col);
448
6.01k
        } else {
449
6.01k
            val = col.GetString();
450
6.01k
        }
451
6.02k
        return Status::OK();
452
6.02k
    }
453
2.85k
    if constexpr (T == TYPE_BOOLEAN) {
454
2.85k
        if (col.IsBool()) {
455
2.85k
            val = col.GetBool();
456
2.85k
            return Status::OK();
457
2.85k
        }
458
459
0
        if (col.IsNumber()) {
460
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
0
            return Status::OK();
462
0
        }
463
464
0
        bool is_nested_str = false;
465
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
0
            val = col[0].GetBool();
467
0
            return Status::OK();
468
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
0
            is_nested_str = true;
470
0
        } else if (pure_doc_value && col.IsArray()) {
471
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
0
        }
473
474
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
0
        const std::string& str_val = str_col.GetString();
476
0
        size_t val_size = str_col.GetStringLength();
477
0
        StringParser::ParseResult result;
478
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
0
        return Status::OK();
481
0
    }
482
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
34.8k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
6.02k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
                  T == TYPE_LARGEINT) {
428
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
                                                                                pure_doc_value));
430
        return Status::OK();
431
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
6.02k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
6.02k
        if (col.IsArray()) {
445
0
            val = json_value_to_string(col);
446
6.02k
        } else if (!col.IsString()) {
447
8
            val = json_value_to_string(col);
448
6.01k
        } else {
449
6.01k
            val = col.GetString();
450
6.01k
        }
451
6.02k
        return Status::OK();
452
6.02k
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
6.02k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
6.02k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
2.85k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
2.85k
                  T == TYPE_LARGEINT) {
428
2.85k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
2.85k
                                                                                pure_doc_value));
430
2.85k
        return Status::OK();
431
2.85k
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
2.85k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
2.85k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
2.85k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
2.85k
                  T == TYPE_LARGEINT) {
428
2.85k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
2.85k
                                                                                pure_doc_value));
430
2.85k
        return Status::OK();
431
2.85k
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
2.85k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
2.85k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
3.16k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
3.16k
                  T == TYPE_LARGEINT) {
428
3.16k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
3.16k
                                                                                pure_doc_value));
430
3.16k
        return Status::OK();
431
3.16k
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
3.16k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
3.16k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
3.79k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
3.79k
                  T == TYPE_LARGEINT) {
428
3.79k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
3.79k
                                                                                pure_doc_value));
430
3.79k
        return Status::OK();
431
3.79k
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
3.79k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
3.79k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
1.92k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
1.92k
                  T == TYPE_LARGEINT) {
428
1.92k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
1.92k
                                                                                pure_doc_value));
430
1.92k
        return Status::OK();
431
1.92k
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
1.92k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
1.92k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
5.71k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
                  T == TYPE_LARGEINT) {
428
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
                                                                                pure_doc_value));
430
        return Status::OK();
431
    }
432
5.71k
    if constexpr (T == TYPE_FLOAT) {
433
5.71k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
5.71k
        return Status::OK();
435
5.71k
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
5.71k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
5.71k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
5.71k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
                  T == TYPE_LARGEINT) {
428
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
                                                                                pure_doc_value));
430
        return Status::OK();
431
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
5.71k
    if constexpr (T == TYPE_DOUBLE) {
437
5.71k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
5.71k
        return Status::OK();
439
5.71k
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
    if constexpr (T == TYPE_BOOLEAN) {
454
        if (col.IsBool()) {
455
            val = col.GetBool();
456
            return Status::OK();
457
        }
458
459
        if (col.IsNumber()) {
460
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
            return Status::OK();
462
        }
463
464
        bool is_nested_str = false;
465
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
            val = col[0].GetBool();
467
            return Status::OK();
468
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
            is_nested_str = true;
470
        } else if (pure_doc_value && col.IsArray()) {
471
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
        }
473
474
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
        const std::string& str_val = str_col.GetString();
476
        size_t val_size = str_col.GetStringLength();
477
        StringParser::ParseResult result;
478
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
        return Status::OK();
481
    }
482
5.71k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
5.71k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
425
2.85k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
426
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
427
                  T == TYPE_LARGEINT) {
428
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
429
                                                                                pure_doc_value));
430
        return Status::OK();
431
    }
432
    if constexpr (T == TYPE_FLOAT) {
433
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
434
        return Status::OK();
435
    }
436
    if constexpr (T == TYPE_DOUBLE) {
437
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
438
        return Status::OK();
439
    }
440
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
441
        // When ES mapping is keyword/text but actual data is an array,
442
        // serialize the array to JSON string instead of throwing an error.
443
        // This is valid in ES since any field can hold array values.
444
        if (col.IsArray()) {
445
            val = json_value_to_string(col);
446
        } else if (!col.IsString()) {
447
            val = json_value_to_string(col);
448
        } else {
449
            val = col.GetString();
450
        }
451
        return Status::OK();
452
    }
453
2.85k
    if constexpr (T == TYPE_BOOLEAN) {
454
2.85k
        if (col.IsBool()) {
455
2.85k
            val = col.GetBool();
456
2.85k
            return Status::OK();
457
2.85k
        }
458
459
0
        if (col.IsNumber()) {
460
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
461
0
            return Status::OK();
462
0
        }
463
464
0
        bool is_nested_str = false;
465
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
466
0
            val = col[0].GetBool();
467
0
            return Status::OK();
468
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
469
0
            is_nested_str = true;
470
0
        } else if (pure_doc_value && col.IsArray()) {
471
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
472
0
        }
473
474
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
475
0
        const std::string& str_val = str_col.GetString();
476
0
        size_t val_size = str_col.GetStringLength();
477
0
        StringParser::ParseResult result;
478
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
479
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
480
0
        return Status::OK();
481
0
    }
482
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
483
2.85k
}
484
485
template <PrimitiveType T>
486
Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type,
487
34.8k
                             bool pure_doc_value, Array& array) {
488
34.8k
    typename PrimitiveTypeTraits<T>::CppType val;
489
34.8k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
34.8k
    array.push_back(Field::create_field<T>(val));
491
34.8k
    return Status::OK();
492
34.8k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
6.02k
                             bool pure_doc_value, Array& array) {
488
6.02k
    typename PrimitiveTypeTraits<T>::CppType val;
489
6.02k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
6.02k
    array.push_back(Field::create_field<T>(val));
491
6.02k
    return Status::OK();
492
6.02k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
2.85k
                             bool pure_doc_value, Array& array) {
488
2.85k
    typename PrimitiveTypeTraits<T>::CppType val;
489
2.85k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
2.85k
    array.push_back(Field::create_field<T>(val));
491
2.85k
    return Status::OK();
492
2.85k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
2.85k
                             bool pure_doc_value, Array& array) {
488
2.85k
    typename PrimitiveTypeTraits<T>::CppType val;
489
2.85k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
2.85k
    array.push_back(Field::create_field<T>(val));
491
2.85k
    return Status::OK();
492
2.85k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
3.16k
                             bool pure_doc_value, Array& array) {
488
3.16k
    typename PrimitiveTypeTraits<T>::CppType val;
489
3.16k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
3.16k
    array.push_back(Field::create_field<T>(val));
491
3.16k
    return Status::OK();
492
3.16k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
3.79k
                             bool pure_doc_value, Array& array) {
488
3.79k
    typename PrimitiveTypeTraits<T>::CppType val;
489
3.79k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
3.79k
    array.push_back(Field::create_field<T>(val));
491
3.79k
    return Status::OK();
492
3.79k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
1.92k
                             bool pure_doc_value, Array& array) {
488
1.92k
    typename PrimitiveTypeTraits<T>::CppType val;
489
1.92k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
1.92k
    array.push_back(Field::create_field<T>(val));
491
1.92k
    return Status::OK();
492
1.92k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
5.71k
                             bool pure_doc_value, Array& array) {
488
5.71k
    typename PrimitiveTypeTraits<T>::CppType val;
489
5.71k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
5.71k
    array.push_back(Field::create_field<T>(val));
491
5.71k
    return Status::OK();
492
5.71k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
5.71k
                             bool pure_doc_value, Array& array) {
488
5.71k
    typename PrimitiveTypeTraits<T>::CppType val;
489
5.71k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
5.71k
    array.push_back(Field::create_field<T>(val));
491
5.71k
    return Status::OK();
492
5.71k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
487
2.85k
                             bool pure_doc_value, Array& array) {
488
2.85k
    typename PrimitiveTypeTraits<T>::CppType val;
489
2.85k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
490
2.85k
    array.push_back(Field::create_field<T>(val));
491
2.85k
    return Status::OK();
492
2.85k
}
493
494
template <PrimitiveType T>
495
Status process_column_array(const rapidjson::Value& col, PrimitiveType sub_type,
496
9.47k
                            bool pure_doc_value, Array& array) {
497
34.7k
    for (const auto& sub_col : col.GetArray()) {
498
34.7k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
34.7k
    }
500
9.47k
    return Status::OK();
501
9.47k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
2.24k
                            bool pure_doc_value, Array& array) {
497
5.97k
    for (const auto& sub_col : col.GetArray()) {
498
5.97k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
5.97k
    }
500
2.24k
    return Status::OK();
501
2.24k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
714
                            bool pure_doc_value, Array& array) {
497
2.85k
    for (const auto& sub_col : col.GetArray()) {
498
2.85k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
2.85k
    }
500
714
    return Status::OK();
501
714
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
714
                            bool pure_doc_value, Array& array) {
497
2.85k
    for (const auto& sub_col : col.GetArray()) {
498
2.85k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
2.85k
    }
500
714
    return Status::OK();
501
714
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
810
                            bool pure_doc_value, Array& array) {
497
3.11k
    for (const auto& sub_col : col.GetArray()) {
498
3.11k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
3.11k
    }
500
810
    return Status::OK();
501
810
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
948
                            bool pure_doc_value, Array& array) {
497
3.79k
    for (const auto& sub_col : col.GetArray()) {
498
3.79k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
3.79k
    }
500
948
    return Status::OK();
501
948
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
480
                            bool pure_doc_value, Array& array) {
497
1.92k
    for (const auto& sub_col : col.GetArray()) {
498
1.92k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
1.92k
    }
500
480
    return Status::OK();
501
480
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
1.42k
                            bool pure_doc_value, Array& array) {
497
5.71k
    for (const auto& sub_col : col.GetArray()) {
498
5.71k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
5.71k
    }
500
1.42k
    return Status::OK();
501
1.42k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
1.42k
                            bool pure_doc_value, Array& array) {
497
5.71k
    for (const auto& sub_col : col.GetArray()) {
498
5.71k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
5.71k
    }
500
1.42k
    return Status::OK();
501
1.42k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
496
714
                            bool pure_doc_value, Array& array) {
497
2.85k
    for (const auto& sub_col : col.GetArray()) {
498
2.85k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
499
2.85k
    }
500
714
    return Status::OK();
501
714
}
502
503
template <PrimitiveType T>
504
Status process_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
505
9.57k
                      Array& array) {
506
9.57k
    if (!col.IsArray()) {
507
96
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
9.47k
    } else {
509
9.47k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
9.47k
    }
511
9.57k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
2.29k
                      Array& array) {
506
2.29k
    if (!col.IsArray()) {
507
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
2.24k
    } else {
509
2.24k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
2.24k
    }
511
2.29k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
714
                      Array& array) {
506
714
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
714
    } else {
509
714
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
714
    }
511
714
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
714
                      Array& array) {
506
714
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
714
    } else {
509
714
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
714
    }
511
714
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
858
                      Array& array) {
506
858
    if (!col.IsArray()) {
507
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
810
    } else {
509
810
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
810
    }
511
858
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
948
                      Array& array) {
506
948
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
948
    } else {
509
948
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
948
    }
511
948
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
480
                      Array& array) {
506
480
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
480
    } else {
509
480
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
480
    }
511
480
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
1.42k
                      Array& array) {
506
1.42k
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
1.42k
    } else {
509
1.42k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
1.42k
    }
511
1.42k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
1.42k
                      Array& array) {
506
1.42k
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
1.42k
    } else {
509
1.42k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
1.42k
    }
511
1.42k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
505
714
                      Array& array) {
506
714
    if (!col.IsArray()) {
507
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
508
714
    } else {
509
714
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
510
714
    }
511
714
}
512
513
template <PrimitiveType T>
514
Status process_date_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
515
1.42k
                           Array& array, const cctz::time_zone& time_zone) {
516
1.42k
    if (!col.IsArray()) {
517
0
        typename PrimitiveTypeTraits<T>::CppType data;
518
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
519
0
        array.push_back(Field::create_field<T>(data));
520
1.42k
    } else {
521
2.85k
        for (const auto& sub_col : col.GetArray()) {
522
2.85k
            typename PrimitiveTypeTraits<T>::CppType data;
523
2.85k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
524
2.85k
            array.push_back(Field::create_field<T>(data));
525
2.85k
        }
526
1.42k
    }
527
1.42k
    return Status::OK();
528
1.42k
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
515
718
                           Array& array, const cctz::time_zone& time_zone) {
516
718
    if (!col.IsArray()) {
517
0
        typename PrimitiveTypeTraits<T>::CppType data;
518
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
519
0
        array.push_back(Field::create_field<T>(data));
520
718
    } else {
521
1.43k
        for (const auto& sub_col : col.GetArray()) {
522
1.43k
            typename PrimitiveTypeTraits<T>::CppType data;
523
1.43k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
524
1.43k
            array.push_back(Field::create_field<T>(data));
525
1.43k
        }
526
718
    }
527
718
    return Status::OK();
528
718
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
515
710
                           Array& array, const cctz::time_zone& time_zone) {
516
710
    if (!col.IsArray()) {
517
0
        typename PrimitiveTypeTraits<T>::CppType data;
518
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
519
0
        array.push_back(Field::create_field<T>(data));
520
710
    } else {
521
1.42k
        for (const auto& sub_col : col.GetArray()) {
522
1.42k
            typename PrimitiveTypeTraits<T>::CppType data;
523
1.42k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
524
1.42k
            array.push_back(Field::create_field<T>(data));
525
1.42k
        }
526
710
    }
527
710
    return Status::OK();
528
710
}
529
530
Status process_jsonb_column(const rapidjson::Value& col, PrimitiveType sub_type,
531
144
                            bool pure_doc_value, Array& array) {
532
144
    if (!col.IsArray()) {
533
0
        JsonBinaryValue jsonb_value;
534
0
        RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
535
0
        JsonbField json(jsonb_value.value(), jsonb_value.size());
536
0
        array.push_back(Field::create_field<TYPE_JSONB>(std::move(json)));
537
144
    } else {
538
304
        for (const auto& sub_col : col.GetArray()) {
539
304
            JsonBinaryValue jsonb_value;
540
304
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(sub_col)));
541
304
            JsonbField json(jsonb_value.value(), jsonb_value.size());
542
304
            array.push_back(Field::create_field<TYPE_JSONB>(json));
543
304
        }
544
144
    }
545
144
    return Status::OK();
546
144
}
547
548
Status ScrollParser::parse_column(const rapidjson::Value& col, PrimitiveType sub_type,
549
                                  bool pure_doc_value, Array& array,
550
11.1k
                                  const cctz::time_zone& time_zone) {
551
11.1k
    switch (sub_type) {
552
0
    case TYPE_CHAR:
553
0
    case TYPE_VARCHAR:
554
2.29k
    case TYPE_STRING:
555
2.29k
        return process_column<TYPE_STRING>(col, sub_type, pure_doc_value, array);
556
714
    case TYPE_TINYINT:
557
714
        return process_column<TYPE_TINYINT>(col, sub_type, pure_doc_value, array);
558
714
    case TYPE_SMALLINT:
559
714
        return process_column<TYPE_SMALLINT>(col, sub_type, pure_doc_value, array);
560
858
    case TYPE_INT:
561
858
        return process_column<TYPE_INT>(col, sub_type, pure_doc_value, array);
562
948
    case TYPE_BIGINT:
563
948
        return process_column<TYPE_BIGINT>(col, sub_type, pure_doc_value, array);
564
480
    case TYPE_LARGEINT:
565
480
        return process_column<TYPE_LARGEINT>(col, sub_type, pure_doc_value, array);
566
1.42k
    case TYPE_FLOAT:
567
1.42k
        return process_column<TYPE_FLOAT>(col, sub_type, pure_doc_value, array);
568
1.42k
    case TYPE_DOUBLE:
569
1.42k
        return process_column<TYPE_DOUBLE>(col, sub_type, pure_doc_value, array);
570
714
    case TYPE_BOOLEAN:
571
714
        return process_column<TYPE_BOOLEAN>(col, sub_type, pure_doc_value, array);
572
    // date/datetime v2 is the default type for catalog table,
573
    // see https://github.com/apache/doris/pull/16304
574
    // No need to support date and datetime types.
575
718
    case TYPE_DATEV2: {
576
718
        return process_date_column<TYPE_DATEV2>(col, sub_type, pure_doc_value, array, time_zone);
577
0
    }
578
710
    case TYPE_DATETIMEV2: {
579
710
        return process_date_column<TYPE_DATETIMEV2>(col, sub_type, pure_doc_value, array,
580
710
                                                    time_zone);
581
0
    }
582
144
    case TYPE_JSONB: {
583
144
        return process_jsonb_column(col, sub_type, pure_doc_value, array);
584
0
    }
585
0
    default:
586
0
        LOG(ERROR) << "Do not support Array type: " << sub_type;
587
0
        return Status::InternalError("Unsupported type");
588
11.1k
    }
589
11.1k
}
590
591
724
ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {}
592
593
724
ScrollParser::~ScrollParser() = default;
594
595
720
Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) {
596
    // rely on `_size !=0 ` to determine whether scroll ends
597
720
    _size = 0;
598
720
    _document_node.Parse(scroll_result.c_str(), scroll_result.length());
599
720
    if (_document_node.HasParseError()) {
600
0
        return Status::InternalError("Parsing json error, json is: {}", scroll_result);
601
0
    }
602
603
720
    if (!exactly_once && !_document_node.HasMember(FIELD_SCROLL_ID)) {
604
0
        LOG(WARNING) << "Document has not a scroll id field scroll response:" << scroll_result;
605
0
        return Status::InternalError("Document has not a scroll id field");
606
0
    }
607
608
724
    if (!exactly_once) {
609
724
        const rapidjson::Value& scroll_node = _document_node[FIELD_SCROLL_ID];
610
724
        _scroll_id = scroll_node.GetString();
611
724
    }
612
    // { hits: { total : 2, "hits" : [ {}, {}, {} ]}}
613
720
    const rapidjson::Value& outer_hits_node = _document_node[FIELD_HITS];
614
    // if has no inner hits, there has no data in this index
615
720
    if (!outer_hits_node.HasMember(FIELD_INNER_HITS)) {
616
4
        return Status::OK();
617
4
    }
618
716
    const rapidjson::Value& inner_hits_node = outer_hits_node[FIELD_INNER_HITS];
619
    // this happened just the end of scrolling
620
716
    if (!inner_hits_node.IsArray()) {
621
0
        return Status::OK();
622
0
    }
623
716
    _inner_hits_node.CopyFrom(inner_hits_node, _document_node.GetAllocator());
624
    // how many documents contains in this batch
625
716
    _size = _inner_hits_node.Size();
626
716
    return Status::OK();
627
716
}
628
629
1.43k
int ScrollParser::get_size() const {
630
1.43k
    return _size;
631
1.43k
}
632
633
724
const std::string& ScrollParser::get_scroll_id() {
634
724
    return _scroll_id;
635
724
}
636
637
Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
638
                                  std::vector<MutableColumnPtr>& columns, bool* line_eof,
639
                                  const std::map<std::string, std::string>& docvalue_context,
640
3.15k
                                  const cctz::time_zone& time_zone) {
641
3.15k
    *line_eof = true;
642
643
3.15k
    if (_size <= 0 || _line_index >= _size) {
644
720
        return Status::OK();
645
720
    }
646
647
2.43k
    const rapidjson::Value& obj = _inner_hits_node[_line_index++];
648
2.43k
    bool pure_doc_value = false;
649
2.43k
    if (obj.HasMember("fields")) {
650
832
        pure_doc_value = true;
651
832
    }
652
    // obj may be neither have `_source` nor `fields` field.
653
2.43k
    const rapidjson::Value* line = nullptr;
654
2.43k
    if (obj.HasMember(FIELD_SOURCE)) {
655
1.58k
        line = &obj[FIELD_SOURCE];
656
1.58k
    } else if (obj.HasMember("fields")) {
657
830
        line = &obj["fields"];
658
830
    }
659
660
21.3k
    for (int i = 0; i < tuple_desc->slots().size(); ++i) {
661
18.9k
        const SlotDescriptor* slot_desc = tuple_desc->slots()[i];
662
18.9k
        auto* col_ptr = columns[i].get();
663
664
18.9k
        if (slot_desc->col_name() == FIELD_ID) {
665
            // actually this branch will not be reached, this is guaranteed by Doris FE.
666
0
            if (pure_doc_value) {
667
0
                return Status::RuntimeError("obtain `_id` is not supported in doc_values mode");
668
0
            }
669
            // obj[FIELD_ID] must not be NULL
670
0
            std::string _id = obj[FIELD_ID].GetString();
671
0
            size_t len = _id.length();
672
673
0
            col_ptr->insert_data(const_cast<const char*>(_id.data()), len);
674
0
            continue;
675
0
        }
676
677
18.9k
        const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str()
678
18.9k
                                              : slot_desc->col_name().c_str();
679
680
18.9k
        if (line == nullptr || line->FindMember(col_name) == line->MemberEnd()) {
681
558
            if (slot_desc->is_nullable()) {
682
558
                auto* nullable_column = reinterpret_cast<ColumnNullable*>(col_ptr);
683
558
                nullable_column->insert_data(nullptr, 0);
684
558
                continue;
685
558
            } else {
686
0
                std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
687
0
                return Status::RuntimeError(details);
688
0
            }
689
558
        }
690
691
18.3k
        const rapidjson::Value& col = (*line)[col_name];
692
693
18.3k
        auto type = slot_desc->type()->get_primitive_type();
694
695
        // when the column value is null, the subsequent type casting will report an error
696
18.3k
        if (col.IsNull() && slot_desc->is_nullable()) {
697
4
            col_ptr->insert_data(nullptr, 0);
698
4
            continue;
699
18.3k
        } else if (col.IsNull() && !slot_desc->is_nullable()) {
700
0
            std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
701
0
            return Status::RuntimeError(details);
702
0
        }
703
18.3k
        switch (type) {
704
0
        case TYPE_CHAR:
705
0
        case TYPE_VARCHAR:
706
2.06k
        case TYPE_STRING: {
707
            // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
708
            // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
709
            // this may be a tricky, but we can work around this issue
710
2.06k
            std::string val;
711
2.06k
            if (pure_doc_value) {
712
4
                if (col.Empty()) {
713
0
                    break;
714
4
                } else if (col.Size() > 1) {
715
                    // doc_values with multiple elements means actual array data
716
                    // in ES keyword/text field, serialize as JSON array string
717
0
                    val = json_value_to_string(col);
718
4
                } else if (!col[0].IsString()) {
719
0
                    val = json_value_to_string(col[0]);
720
4
                } else {
721
4
                    val = col[0].GetString();
722
4
                }
723
2.06k
            } else {
724
                // When ES mapping is keyword/text but actual data is an array,
725
                // serialize the array to JSON string instead of throwing an error.
726
                // This is valid in ES since any field can hold array values.
727
2.06k
                if (col.IsArray()) {
728
64
                    val = json_value_to_string(col);
729
1.99k
                } else if (!col.IsString()) {
730
0
                    val = json_value_to_string(col);
731
1.99k
                } else {
732
1.99k
                    val = col.GetString();
733
1.99k
                }
734
2.06k
            }
735
2.06k
            size_t val_size = val.length();
736
2.06k
            col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
737
2.06k
            break;
738
2.06k
        }
739
740
0
        case TYPE_TINYINT: {
741
0
            RETURN_IF_ERROR(insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value,
742
0
                                                     slot_desc->is_nullable()));
743
0
            break;
744
0
        }
745
746
0
        case TYPE_SMALLINT: {
747
0
            RETURN_IF_ERROR(insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value,
748
0
                                                      slot_desc->is_nullable()));
749
0
            break;
750
0
        }
751
752
44
        case TYPE_INT: {
753
44
            RETURN_IF_ERROR(insert_int_value<int32_t>(col, type, col_ptr, pure_doc_value,
754
44
                                                      slot_desc->is_nullable()));
755
44
            break;
756
44
        }
757
758
1.02k
        case TYPE_BIGINT: {
759
1.02k
            RETURN_IF_ERROR(insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value,
760
1.02k
                                                      slot_desc->is_nullable()));
761
1.02k
            break;
762
1.02k
        }
763
764
1.02k
        case TYPE_LARGEINT: {
765
0
            RETURN_IF_ERROR(insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
766
0
                                                       slot_desc->is_nullable()));
767
0
            break;
768
0
        }
769
770
408
        case TYPE_DOUBLE: {
771
408
            RETURN_IF_ERROR(insert_float_value<double>(col, type, col_ptr, pure_doc_value,
772
408
                                                       slot_desc->is_nullable()));
773
408
            break;
774
408
        }
775
776
408
        case TYPE_FLOAT: {
777
0
            RETURN_IF_ERROR(insert_float_value<float>(col, type, col_ptr, pure_doc_value,
778
0
                                                      slot_desc->is_nullable()));
779
0
            break;
780
0
        }
781
782
0
        case TYPE_BOOLEAN: {
783
0
            if (col.IsBool()) {
784
0
                int8_t val = col.GetBool();
785
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
786
0
                break;
787
0
            }
788
789
0
            if (col.IsNumber()) {
790
0
                int8_t val = static_cast<int8_t>(col.GetInt());
791
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
792
0
                break;
793
0
            }
794
795
0
            bool is_nested_str = false;
796
0
            if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
797
0
                int8_t val = col[0].GetBool();
798
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
799
0
                break;
800
0
            } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
801
0
                is_nested_str = true;
802
0
            } else if (pure_doc_value && col.IsArray()) {
803
0
                return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
804
0
            }
805
806
0
            const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
807
808
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
809
810
0
            const std::string& val = str_col.GetString();
811
0
            size_t val_size = str_col.GetStringLength();
812
0
            StringParser::ParseResult result;
813
0
            bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
814
0
            RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
815
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
816
0
            break;
817
0
        }
818
0
        case TYPE_DECIMALV2: {
819
0
            DecimalV2Value data;
820
821
0
            if (col.IsDouble()) {
822
0
                data.assign_from_double(col.GetDouble());
823
0
            } else {
824
0
                std::string val;
825
0
                if (pure_doc_value) {
826
0
                    if (col.Empty()) {
827
0
                        break;
828
0
                    } else if (!col[0].IsString()) {
829
0
                        val = json_value_to_string(col[0]);
830
0
                    } else {
831
0
                        val = col[0].GetString();
832
0
                    }
833
0
                } else {
834
0
                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
835
0
                    if (!col.IsString()) {
836
0
                        val = json_value_to_string(col);
837
0
                    } else {
838
0
                        val = col.GetString();
839
0
                    }
840
0
                }
841
0
                data.parse_from_str(val.data(), static_cast<int32_t>(val.length()));
842
0
            }
843
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
844
0
            break;
845
0
        }
846
847
0
        case TYPE_DATE:
848
0
            RETURN_IF_ERROR(
849
0
                    fill_date_int<TYPE_DATE>(col, type, pure_doc_value, col_ptr, time_zone));
850
0
            break;
851
0
        case TYPE_DATETIME:
852
0
            RETURN_IF_ERROR(
853
0
                    fill_date_int<TYPE_DATETIME>(col, type, pure_doc_value, col_ptr, time_zone));
854
0
            break;
855
104
        case TYPE_DATEV2:
856
104
            RETURN_IF_ERROR(
857
104
                    fill_date_int<TYPE_DATEV2>(col, type, pure_doc_value, col_ptr, time_zone));
858
104
            break;
859
2.24k
        case TYPE_DATETIMEV2: {
860
2.24k
            RETURN_IF_ERROR(
861
2.24k
                    fill_date_int<TYPE_DATETIMEV2>(col, type, pure_doc_value, col_ptr, time_zone));
862
2.24k
            break;
863
2.24k
        }
864
11.1k
        case TYPE_ARRAY: {
865
11.1k
            Array array;
866
11.1k
            const auto& sub_type = assert_cast<const DataTypeArray*>(
867
11.1k
                                           remove_nullable(tuple_desc->slots()[i]->type()).get())
868
11.1k
                                           ->get_nested_type()
869
11.1k
                                           ->get_primitive_type();
870
11.1k
            RETURN_IF_ERROR(parse_column(col, sub_type, pure_doc_value, array, time_zone));
871
11.1k
            col_ptr->insert(Field::create_field<TYPE_ARRAY>(array));
872
11.1k
            break;
873
11.1k
        }
874
1.33k
        case TYPE_JSONB: {
875
1.33k
            JsonBinaryValue jsonb_value;
876
1.33k
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
877
1.33k
            JsonbField json(jsonb_value.value(), jsonb_value.size());
878
1.33k
            col_ptr->insert(Field::create_field<TYPE_JSONB>(json));
879
1.33k
            break;
880
1.33k
        }
881
0
        default: {
882
0
            LOG(ERROR) << "Unsupported data type: " << type_to_string(type);
883
0
            DCHECK(false);
884
0
            break;
885
1.33k
        }
886
18.3k
        }
887
18.3k
    }
888
889
2.43k
    *line_eof = false;
890
2.43k
    return Status::OK();
891
2.43k
}
892
#include "common/compile_check_end.h"
893
} // namespace doris