From f1a3f86b198418121acedb6782de0eb31215acef Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 14 Sep 2020 16:28:31 +0200 Subject: [PATCH 1/2] ARROW-9859: [C++] Decode username and password in URIs Allow passing a %-encoded secret key in a S3 URI. Also, detect and report unrecognized options in S3 URIs. --- cpp/src/arrow/filesystem/s3fs.cc | 21 ++++++++++----------- cpp/src/arrow/filesystem/s3fs_test.cc | 3 +++ cpp/src/arrow/util/uri.cc | 17 +++++++++++++---- cpp/src/arrow/util/uri_test.cc | 13 +++++++++++++ 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 9616bac71bf9..fbc4133c9550 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -271,17 +271,16 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { options.ConfigureDefaultCredentials(); } - auto it = options_map.find("region"); - if (it != options_map.end()) { - options.region = it->second; - } - it = options_map.find("scheme"); - if (it != options_map.end()) { - options.scheme = it->second; - } - it = options_map.find("endpoint_override"); - if (it != options_map.end()) { - options.endpoint_override = it->second; + for (const auto& kv : options_map) { + if (kv.first == "region") { + options.region = kv.second; + } else if (kv.first == "scheme") { + options.scheme = kv.second; + } else if (kv.first == "endpoint_override") { + options.endpoint_override = kv.second; + } else { + return Status::Invalid("Unexpected option in S3 URI: '", kv.first, "'"); + } } return options; diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 9fd1b57cdc18..ac9fb31648e3 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -218,6 +218,9 @@ TEST_F(S3OptionsTest, FromUri) { // Missing bucket name ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path)); + + // Invalid option + ASSERT_RAISES(Invalid, S3Options::FromUri("s3://mybucket/?xxx=zzz", &path)); } TEST_F(S3OptionsTest, FromAccessKey) { diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc index b116a1362776..795e3fa2c8b0 100644 --- a/cpp/src/arrow/util/uri.cc +++ b/cpp/src/arrow/util/uri.cc @@ -49,12 +49,21 @@ std::string TextRangeToString(const UriTextRangeStructA& range) { bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != nullptr; } #ifdef _WIN32 -bool IsDriveSpec(util::string_view s) { +bool IsDriveSpec(const util::string_view s) { return (s.length() >= 2 && s[1] == ':' && ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z'))); } #endif +std::string UriUnescape(const util::string_view s) { + std::string result(s); + if (!result.empty()) { + auto end = uriUnescapeInPlaceA(&result[0]); + result.resize(end - &result[0]); + } + return result; +} + } // namespace std::string UriEscape(const std::string& s) { @@ -125,9 +134,9 @@ std::string Uri::username() const { auto userpass = TextRangeToView(impl_->uri_.userInfo); auto sep_pos = userpass.find_first_of(':'); if (sep_pos == util::string_view::npos) { - return std::string(userpass); + return UriUnescape(userpass); } else { - return std::string(userpass.substr(0, sep_pos)); + return UriUnescape(userpass.substr(0, sep_pos)); } } @@ -137,7 +146,7 @@ std::string Uri::password() const { if (sep_pos == util::string_view::npos) { return std::string(); } else { - return std::string(userpass.substr(sep_pos + 1)); + return UriUnescape(userpass.substr(sep_pos + 1)); } } diff --git a/cpp/src/arrow/util/uri_test.cc b/cpp/src/arrow/util/uri_test.cc index f4d5d078ff46..44804704c436 100644 --- a/cpp/src/arrow/util/uri_test.cc +++ b/cpp/src/arrow/util/uri_test.cc @@ -212,6 +212,19 @@ TEST(Uri, ParseUserPass) { ASSERT_EQ(uri.host(), "localhost"); ASSERT_EQ(uri.username(), "someuser"); ASSERT_EQ(uri.password(), "somepass"); + + // With %-encoding + ASSERT_OK(uri.Parse("http://some%20user%2Fname:somepass@localhost")); + ASSERT_EQ(uri.scheme(), "http"); + ASSERT_EQ(uri.host(), "localhost"); + ASSERT_EQ(uri.username(), "some user/name"); + ASSERT_EQ(uri.password(), "somepass"); + + ASSERT_OK(uri.Parse("http://some%20user%2Fname:some%20pass%2Fword@localhost")); + ASSERT_EQ(uri.scheme(), "http"); + ASSERT_EQ(uri.host(), "localhost"); + ASSERT_EQ(uri.username(), "some user/name"); + ASSERT_EQ(uri.password(), "some pass/word"); } TEST(Uri, FileScheme) { From 91b3aea3aab1c14bc863500e8049289046cb5626 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 14 Sep 2020 17:09:29 +0200 Subject: [PATCH 2/2] Improve error message --- cpp/src/arrow/filesystem/s3fs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index fbc4133c9550..403f2788f95c 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -279,7 +279,7 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { } else if (kv.first == "endpoint_override") { options.endpoint_override = kv.second; } else { - return Status::Invalid("Unexpected option in S3 URI: '", kv.first, "'"); + return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); } }