Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions be/src/exec/es/es_scroll_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ static const std::string ERROR_COL_DATA_IS_ARRAY =
static const std::string INVALID_NULL_VALUE =
"Invalid null value occurs: Non-null column `$0` contains NULL";

#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \
#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array) \
do { \
if (col.IsArray()) { \
if (col.IsArray() == is_array) { \
std::stringstream ss; \
ss << "Expected value of type: " << type_to_string(type) \
<< "; but found type: " << json_type_to_string(col.GetType()) \
Expand Down Expand Up @@ -167,7 +167,7 @@ Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot
return Status::OK();
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
Expand Down Expand Up @@ -294,7 +294,7 @@ Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_d
return get_date_value_int<T, RT>(col[0], type, false, slot, time_zone);
} else {
// this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
return get_date_value_int<T, RT>(col, type, true, slot, time_zone);
}
Expand Down Expand Up @@ -322,7 +322,7 @@ Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* sl
return Status::OK();
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
Expand Down Expand Up @@ -351,7 +351,7 @@ Status insert_float_value(const rapidjson::Value& col, PrimitiveType type,
return Status::OK();
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
Expand Down Expand Up @@ -390,7 +390,7 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
return Status::OK();
}

RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);

StringParser::ParseResult result;
Expand Down Expand Up @@ -543,7 +543,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
val = col[0].GetString();
}
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
if (!col.IsString()) {
val = json_value_to_string(col);
} else {
Expand Down Expand Up @@ -623,7 +623,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,

const rapidjson::Value& str_col = is_nested_str ? col[0] : col;

RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);

const std::string& val = str_col.GetString();
size_t val_size = str_col.GetStringLength();
Expand All @@ -649,7 +649,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
val = col[0].GetString();
}
} else {
RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
if (!col.IsString()) {
val = json_value_to_string(col);
} else {
Expand Down Expand Up @@ -679,13 +679,14 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
case TYPE_ARRAY: {
vectorized::Array array;
const auto& sub_type = tuple_desc->slots()[i]->type().children[0].type;
for (auto& sub_col : col.GetArray()) {
RETURN_ERROR_IF_COL_IS_ARRAY(col, type, false);
for (const auto& sub_col : col.GetArray()) {
switch (sub_type) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
std::string val;
RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type);
RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type, true);
if (!sub_col.IsString()) {
val = json_value_to_string(sub_col);
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"name": "Andy", "sports": "soccer"}
{"name": "Betty", "sports": "pingpong ball"}
{"name": "Cindy", "sports": "武术"}
{"name": "David", "sports": ["volleyball"]}
{"name": "Emily", "sports": ["baseball", "golf", "hockey"]}
{"name": "Frank", "sports": ["rugby", "cricket", "boxing"]}
{"name": "Grace", "sports": ["table tennis", "badminton", "athletics"]}
{"name": "Henry", "sports": ["archery", "fencing", "weightlifting"]}
{"name": "Ivy", "sports": ["judo", "karate", "taekwondo"]}
{"name": "Jack", "sports": ["wrestling", "gymnastics", "surfing"]}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@
# specific language governing permissions and limitations
# under the License.

generate_bulk_request() {
local index_name=$1
local type_value=$2
local id_prefix=$3
local data_file=$4
local output_file=$5

// clear output file
echo "" > "$output_file"

local id=1
while IFS= read -r line; do
if [ -n "$type_value" ]; then
echo "{\"index\": {\"_index\": \"$index_name\", \"_type\": \"$type_value\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file"
else
echo "{\"index\": {\"_index\": \"$index_name\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file"
fi
echo "$line" >> "$output_file"
id=$((id + 1))
done < "$data_file"
}

array_data_file="/mnt/scripts/data/composite_type_array_bulk.json"

# es 5
# create index test1
# shellcheck disable=SC2154
Expand Down Expand Up @@ -43,6 +67,14 @@ curl "http://${ES_5_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app
curl "http://${ES_5_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"
# create index .hide
curl "http://${ES_5_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json"
# create index composite_type_array
curl "http://${ES_5_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json"
# put data with bulk for composite_type_array
bulk_request_file="/mnt/scripts/data/bulk_request_es5.json"
generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file"
curl -X POST "http://${ES_5_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json"
# put _meta for composite_type_array
curl "http://${ES_5_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json"

# es 6
# create index test1
Expand Down Expand Up @@ -70,6 +102,14 @@ curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app
curl "http://${ES_6_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"
# create index .hide
curl "http://${ES_6_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json"
# create index composite_type_array
curl "http://${ES_6_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json"
# put data with bulk for composite_type_array
bulk_request_file="/mnt/scripts/data/bulk_request_es6.json"
generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file"
curl -X POST "http://${ES_6_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json"
# put _meta for composite_type_array
curl "http://${ES_6_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json"

# es7
# create index test1
Expand Down Expand Up @@ -106,6 +146,15 @@ curl "http://${ES_7_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:applica
# create index .hide
curl "http://${ES_7_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_hide.json"

# create index composite_type_array
curl "http://${ES_7_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json"
# put data with bulk for composite_type_array
bulk_request_file="/mnt/scripts/data/bulk_request_es7.json"
generate_bulk_request "composite_type_array" "_doc" "item_" "$array_data_file" "$bulk_request_file"
curl -X POST "http://${ES_7_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json"
# put _meta for composite_type_array
curl "http://${ES_7_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json"

# es8
# create index test1
curl "http://${ES_8_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_test1.json"
Expand Down Expand Up @@ -138,3 +187,12 @@ curl "http://${ES_8_HOST}:9200/test3_20231005/_doc/1" -H "Content-Type:applicati
curl "http://${ES_8_HOST}:9200/test1/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"
curl "http://${ES_8_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"
curl "http://${ES_8_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json"

# create index composite_type_array
curl "http://${ES_8_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json"
# put data with bulk for composite_type_array
bulk_request_file="/mnt/scripts/data/bulk_request_es8.json"
generate_bulk_request "composite_type_array" "" "item_" "$array_data_file" "$bulk_request_file"
curl -X POST "http://${ES_8_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json"
# put _meta for composite_type_array
curl "http://${ES_8_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"_meta": {
"doris":{
"array_fields":[
"sports"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"doc": {
"properties": {
"name": { "type": "keyword" },
"sports": { "type": "keyword", "doc_values": false}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"name": { "type": "keyword" },
"sports": { "type": "keyword", "doc_values": false}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_5_18 """select message from test1 where not_null_or_empty(message)"""
order_qt_sql_5_19 """select * from test1 where esquery(c_unsigned_long, '{"match":{"c_unsigned_long":0}}')"""
order_qt_sql_5_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
} catch (Exception e) {
logger.error(e.getMessage())
assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\""))
}

sql """switch test_es_query_es6"""
// order_qt_sql_6_01 """show tables"""
Expand All @@ -236,6 +243,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_6_18 """select message from test1 where not_null_or_empty(message)"""
order_qt_sql_6_19 """select * from test1 where esquery(c_person, '{"match":{"c_person.name":"Andy"}}')"""
order_qt_sql_6_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
} catch (Exception e) {
assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\""))
}

List<List<String>> tables6N = sql """show tables"""
boolean notContainHide = true
Expand Down Expand Up @@ -282,6 +295,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_7_23 """select * from test1 where level = 'debug'"""
order_qt_sql_7_24 """select * from test1 where esquery(c_float, '{"match":{"c_float":1.1}}')"""
order_qt_sql_7_25 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
} catch (Exception e) {
assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\""))
}

List<List<String>> tables7N = sql """show tables"""
boolean notContainHide7 = true
Expand Down Expand Up @@ -328,6 +347,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") {
order_qt_sql_8_21 """select * from test1 where level = 'debug'"""
order_qt_sql_8_22 """select * from test1 where esquery(c_ip, '{"match":{"c_ip":"192.168.0.1"}}')"""
order_qt_sql_8_23 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;"""
try {
sql """select * from composite_type_array;"""
fail("Should not reach here")
} catch (Exception e) {
assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\""))
}

}

Expand Down