From 64b96afbeb79b9b30a5e45eaab1df66d3dd05c00 Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Thu, 1 Feb 2024 19:57:34 -0600 Subject: [PATCH 1/2] Remove External Table Backwards Compatibility Options --- .../src/datasource/listing_table_factory.rs | 8 -- .../test_files/insert_to_external.slt | 95 ++++--------------- 2 files changed, 20 insertions(+), 83 deletions(-) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index e8ffece320d7d..3beaa0286c137 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -135,14 +135,6 @@ impl TableProviderFactory for ListingTableFactory { let mut statement_options = StatementOptions::from(&cmd.options); - // Backwards compatibility (#8547), discard deprecated options - statement_options.take_bool_option("single_file")?; - if let Some(s) = statement_options.take_str_option("insert_mode") { - if !s.eq_ignore_ascii_case("append_new_files") { - return plan_err!("Unknown or unsupported insert mode {s}. Only append_new_files supported"); - } - } - statement_options.take_bool_option("create_local_path")?; statement_options.take_str_option("unbounded"); let file_type = file_format.file_type(); diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index e73778ad44e52..c0f86ac76320a 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -58,11 +58,7 @@ CREATE EXTERNAL TABLE dictionary_encoded_parquet_partitioned( ) STORED AS parquet LOCATION 'test_files/scratch/insert_to_external/parquet_types_partitioned/' -PARTITIONED BY (b) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +PARTITIONED BY (b); query TT insert into dictionary_encoded_parquet_partitioned @@ -83,11 +79,7 @@ CREATE EXTERNAL TABLE dictionary_encoded_arrow_partitioned( ) STORED AS arrow LOCATION 'test_files/scratch/insert_to_external/arrow_dict_partitioned/' -PARTITIONED BY (b) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +PARTITIONED BY (b); query TT insert into dictionary_encoded_arrow_partitioned @@ -100,11 +92,7 @@ CREATE EXTERNAL TABLE dictionary_encoded_arrow_test_readback( a varchar, ) STORED AS arrow -LOCATION 'test_files/scratch/insert_to_external/arrow_dict_partitioned/b=bar/' -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +LOCATION 'test_files/scratch/insert_to_external/arrow_dict_partitioned/b=bar/'; query T select * from dictionary_encoded_arrow_test_readback; @@ -125,11 +113,7 @@ CREATE EXTERNAL TABLE ordered_insert_test(a bigint, b bigint) STORED AS csv LOCATION 'test_files/scratch/insert_to_external/insert_to_ordered/' -WITH ORDER (a ASC, B DESC) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +WITH ORDER (a ASC, B DESC); query TT EXPLAIN INSERT INTO ordered_insert_test values (5, 1), (4, 2), (7,7), (7,8), (7,9), (7,10), (3, 3), (2, 4), (1, 5); @@ -169,11 +153,7 @@ CREATE EXTERNAL TABLE partitioned_insert_test(a string, b string, c bigint) STORED AS csv LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned/' -PARTITIONED BY (a, b) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +PARTITIONED BY (a, b); #note that partitioned cols are moved to the end so value tuples are (c, a, b) query ITT @@ -195,10 +175,7 @@ statement ok CREATE EXTERNAL TABLE partitioned_insert_test_verify(c bigint) STORED AS csv -LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned/a=20/b=100/' -OPTIONS( -insert_mode 'append_new_files', -); +LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned/a=20/b=100/'; query I select * from partitioned_insert_test_verify; @@ -211,11 +188,7 @@ CREATE EXTERNAL TABLE partitioned_insert_test_json(a string, b string) STORED AS json LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_json/' -PARTITIONED BY (a) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +PARTITIONED BY (a); query TT INSERT INTO partitioned_insert_test_json values (1, 2), (3, 4), (5, 6), (1, 2), (3, 4), (5, 6); @@ -230,10 +203,7 @@ statement ok CREATE EXTERNAL TABLE partitioned_insert_test_verify_json(b string) STORED AS json -LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_json/a=2/' -OPTIONS( -insert_mode 'append_new_files', -); +LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_json/a=2/'; query T select * from partitioned_insert_test_verify_json; @@ -246,11 +216,7 @@ CREATE EXTERNAL TABLE partitioned_insert_test_pq(a string, b bigint) STORED AS parquet LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_pq/' -PARTITIONED BY (a) -OPTIONS( -create_local_path 'true', -insert_mode 'append_new_files', -); +PARTITIONED BY (a); query IT INSERT INTO partitioned_insert_test_pq values (1, 2), (3, 4), (5, 6), (1, 2), (3, 4), (5, 6); @@ -271,10 +237,7 @@ statement ok CREATE EXTERNAL TABLE partitioned_insert_test_verify_pq(b bigint) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_pq/a=2/' -OPTIONS( -insert_mode 'append_new_files', -); +LOCATION 'test_files/scratch/insert_to_external/insert_to_partitioned_pq/a=2/'; query I select * from partitioned_insert_test_verify_pq; @@ -287,11 +250,7 @@ statement ok CREATE EXTERNAL TABLE single_file_test(a bigint, b bigint) STORED AS csv -LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv' -OPTIONS( -create_local_path 'true', -single_file 'true', -); +LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv'; query error DataFusion error: Error during planning: Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`\. To append to an existing file use StreamTable, e\.g\. by using CREATE UNBOUNDED EXTERNAL TABLE INSERT INTO single_file_test values (1, 2), (3, 4); @@ -303,11 +262,7 @@ statement ok CREATE UNBOUNDED EXTERNAL TABLE single_file_test(a bigint, b bigint) STORED AS csv -LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv' -OPTIONS( -create_local_path 'true', -single_file 'true', -); +LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv'; query II INSERT INTO single_file_test values (1, 2), (3, 4); @@ -331,10 +286,7 @@ statement ok CREATE EXTERNAL TABLE directory_test(a bigint, b bigint) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0/' -OPTIONS( -create_local_path 'true', -); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0/'; query II INSERT INTO directory_test values (1, 2), (3, 4); @@ -351,8 +303,7 @@ statement ok CREATE EXTERNAL TABLE table_without_values(field1 BIGINT NULL, field2 BIGINT NULL) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1/'; query TT EXPLAIN @@ -417,8 +368,7 @@ statement ok CREATE EXTERNAL TABLE table_without_values(field1 BIGINT NULL, field2 BIGINT NULL) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2/'; query TT EXPLAIN @@ -462,8 +412,7 @@ statement ok CREATE EXTERNAL TABLE table_without_values(c1 varchar NULL) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3/'; # verify that the sort order of the insert query is maintained into the # insert (there should be a SortExec in the following plan) @@ -501,8 +450,7 @@ statement ok CREATE EXTERNAL TABLE table_without_values(id BIGINT, name varchar) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4/'; query IT insert into table_without_values(id, name) values(1, 'foo'); @@ -544,8 +492,7 @@ statement ok CREATE EXTERNAL TABLE table_without_values(field1 BIGINT NOT NULL, field2 BIGINT NULL) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5/'; query II insert into table_without_values values(1, 100); @@ -594,8 +541,7 @@ CREATE EXTERNAL TABLE test_column_defaults( d text default lower('DEFAULT_TEXT'), e timestamp default now() ) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q6/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q6/'; # fill in all column values query IIITP @@ -647,5 +593,4 @@ CREATE EXTERNAL TABLE test_column_defaults( a int, b int default a+1 ) STORED AS parquet -LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q7/' -OPTIONS (create_local_path 'true'); +LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q7/'; From 6bf593b4cd9a6530ccc86a7096793e714479dece Mon Sep 17 00:00:00 2001 From: Junhao Liu Date: Thu, 1 Feb 2024 21:59:41 -0600 Subject: [PATCH 2/2] fix clippy --- datafusion/core/src/datasource/listing_table_factory.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 3beaa0286c137..bcf1f81b3a0bb 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -36,7 +36,7 @@ use crate::execution::context::SessionState; use arrow::datatypes::{DataType, SchemaRef}; use datafusion_common::file_options::{FileTypeWriterOptions, StatementOptions}; -use datafusion_common::{arrow_datafusion_err, plan_err, DataFusionError, FileType}; +use datafusion_common::{arrow_datafusion_err, DataFusionError, FileType}; use datafusion_expr::CreateExternalTable; use async_trait::async_trait;