diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 9616bac71bf9..403f2788f95c 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -271,17 +271,16 @@ Result S3Options::FromUri(const Uri& uri, std::string* out_path) { options.ConfigureDefaultCredentials(); } - auto it = options_map.find("region"); - if (it != options_map.end()) { - options.region = it->second; - } - it = options_map.find("scheme"); - if (it != options_map.end()) { - options.scheme = it->second; - } - it = options_map.find("endpoint_override"); - if (it != options_map.end()) { - options.endpoint_override = it->second; + for (const auto& kv : options_map) { + if (kv.first == "region") { + options.region = kv.second; + } else if (kv.first == "scheme") { + options.scheme = kv.second; + } else if (kv.first == "endpoint_override") { + options.endpoint_override = kv.second; + } else { + return Status::Invalid("Unexpected query parameter in S3 URI: '", kv.first, "'"); + } } return options; diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 9fd1b57cdc18..ac9fb31648e3 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -218,6 +218,9 @@ TEST_F(S3OptionsTest, FromUri) { // Missing bucket name ASSERT_RAISES(Invalid, S3Options::FromUri("s3:///foo/bar/", &path)); + + // Invalid option + ASSERT_RAISES(Invalid, S3Options::FromUri("s3://mybucket/?xxx=zzz", &path)); } TEST_F(S3OptionsTest, FromAccessKey) { diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc index b116a1362776..795e3fa2c8b0 100644 --- a/cpp/src/arrow/util/uri.cc +++ b/cpp/src/arrow/util/uri.cc @@ -49,12 +49,21 @@ std::string TextRangeToString(const UriTextRangeStructA& range) { bool IsTextRangeSet(const UriTextRangeStructA& range) { return range.first != nullptr; } #ifdef _WIN32 -bool IsDriveSpec(util::string_view s) { +bool IsDriveSpec(const util::string_view s) { return (s.length() >= 2 && s[1] == ':' && ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z'))); } #endif +std::string UriUnescape(const util::string_view s) { + std::string result(s); + if (!result.empty()) { + auto end = uriUnescapeInPlaceA(&result[0]); + result.resize(end - &result[0]); + } + return result; +} + } // namespace std::string UriEscape(const std::string& s) { @@ -125,9 +134,9 @@ std::string Uri::username() const { auto userpass = TextRangeToView(impl_->uri_.userInfo); auto sep_pos = userpass.find_first_of(':'); if (sep_pos == util::string_view::npos) { - return std::string(userpass); + return UriUnescape(userpass); } else { - return std::string(userpass.substr(0, sep_pos)); + return UriUnescape(userpass.substr(0, sep_pos)); } } @@ -137,7 +146,7 @@ std::string Uri::password() const { if (sep_pos == util::string_view::npos) { return std::string(); } else { - return std::string(userpass.substr(sep_pos + 1)); + return UriUnescape(userpass.substr(sep_pos + 1)); } } diff --git a/cpp/src/arrow/util/uri_test.cc b/cpp/src/arrow/util/uri_test.cc index f4d5d078ff46..44804704c436 100644 --- a/cpp/src/arrow/util/uri_test.cc +++ b/cpp/src/arrow/util/uri_test.cc @@ -212,6 +212,19 @@ TEST(Uri, ParseUserPass) { ASSERT_EQ(uri.host(), "localhost"); ASSERT_EQ(uri.username(), "someuser"); ASSERT_EQ(uri.password(), "somepass"); + + // With %-encoding + ASSERT_OK(uri.Parse("http://some%20user%2Fname:somepass@localhost")); + ASSERT_EQ(uri.scheme(), "http"); + ASSERT_EQ(uri.host(), "localhost"); + ASSERT_EQ(uri.username(), "some user/name"); + ASSERT_EQ(uri.password(), "somepass"); + + ASSERT_OK(uri.Parse("http://some%20user%2Fname:some%20pass%2Fword@localhost")); + ASSERT_EQ(uri.scheme(), "http"); + ASSERT_EQ(uri.host(), "localhost"); + ASSERT_EQ(uri.username(), "some user/name"); + ASSERT_EQ(uri.password(), "some pass/word"); } TEST(Uri, FileScheme) {