Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/next-release/feature-AmazonS3-92ece24.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"category": "Amazon S3",
"contributor": "",
"type": "feature",
"description": "Adding feature for parsing S3 URIs"
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,15 @@
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import software.amazon.awssdk.annotations.Immutable;
import software.amazon.awssdk.annotations.SdkInternalApi;
import software.amazon.awssdk.annotations.SdkPublicApi;
Expand All @@ -38,6 +43,7 @@
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.client.config.SdkClientConfiguration;
import software.amazon.awssdk.core.client.config.SdkClientOption;
import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
Expand All @@ -62,6 +68,7 @@
import software.amazon.awssdk.services.s3.internal.endpoints.UseGlobalEndpointResolver;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetUrlRequest;
import software.amazon.awssdk.services.s3.parsing.S3Uri;
import software.amazon.awssdk.utils.AttributeMap;
import software.amazon.awssdk.utils.Validate;

Expand Down Expand Up @@ -251,6 +258,103 @@ public URL getUrl(GetUrlRequest getUrlRequest) {
}
}

/**
* Returns a parsed {@link S3Uri} with which a user can easily retrieve the the bucket, key, region, style, and query
* parameters of the URI. Only basic bucket endpoints are supported, i.e., path-style and virtual-hosted-style URLs.
* Encoded buckets, keys, and query parameters will be returned decoded.
*
* @param uri The URI to be parsed
* @return Parsed {@link S3Uri}
*/
public S3Uri parseUri(URI uri) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a blocker, but we may want to support string, which we will encode for them in the future depending on customer ask.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I tried using preprocessUrlStr() from v1 to encode but it runs into issues in cases like:

  • "." in bucket name for virtual hosted style, URI.getHost() doesn't work properly since it only retrieves the portion to the left of the first "."
  • query parameters get parsed as part of the key, so the key is incorrect and query parameters are empty, due to the "?" getting encoded

Seems to be pretty complicated to fix these...

if (uri == null) {
throw SdkClientException.create("URI must not be null");
}

Pattern accessPointPattern = Pattern.compile("^([a-zA-Z0-9\\-]+)\\.s3-accesspoint(-fips)?(\\.dualstack)?"
+ "\\.([a-zA-Z0-9\\-]+)\\.amazonaws\\.com(.cn)?$");
if (accessPointPattern.matcher(uri.toString()).find()) {
throw SdkClientException.create("AccessPoints URI parsing is not supported");
}

Pattern outpostPattern = Pattern.compile("^([a-zA-Z0-9\\-]+)\\.op\\-[0-9]+\\.s3-outposts\\.([a-zA-Z0-9\\-]+)"
+ "\\.amazonaws\\.com(.cn)?$");
if (outpostPattern.matcher(uri.toString()).find()) {
throw SdkClientException.create("Outposts URI parsing is not supported");
}

String bucket = null;
String key = null;
String region = null;
boolean isPathStyle = false;
Map<String, List<String>> queryParams = new HashMap<>();
String path = uri.getPath();

if ("s3".equalsIgnoreCase(uri.getScheme())) {
if (uri.getAuthority() == null) {
throw SdkClientException.create("Invalid S3 URI: bucket not included");
}
bucket = uri.getAuthority();
if (path.length() > 1) {
key = path.substring(1);
}

} else {
if (uri.getHost() == null) {
throw SdkClientException.create("Invalid S3 URI: hostname not included");
}

Pattern endpointPattern = Pattern.compile("^(.+\\.)?s3[.-]([a-z0-9-]+)\\.");
Matcher matcher = endpointPattern.matcher(uri.getHost());
if (!matcher.find()) {
throw SdkClientException.create("Invalid S3 URI: hostname does not appear to be a valid S3 endpoint");
}

String prefix = matcher.group(1);
if (prefix == null || prefix.isEmpty()) {
isPathStyle = true;

if (!path.isEmpty() && !"/".equals(path)) {
int index = path.indexOf('/', 1);

if (index == -1) {
bucket = path.substring(1);
} else {
bucket = path.substring(1, index);
if (index != path.length() - 1) {
key = path.substring(index + 1);
}
}
}
} else {
bucket = prefix.substring(0, prefix.length() - 1);
if (path != null && !path.isEmpty() && !"/".equals(path)) {
key = path.substring(1);
}
}

if (!"amazonaws".equals(matcher.group(2))) {
region = matcher.group(2);
}
}

String queryPart = uri.getQuery();
if (queryPart != null) {
parseQuery(queryParams, queryPart);
}

Region uriRegion = region != null ? Region.of(region) : null;

return S3Uri.builder()
.uri(uri)
.bucket(bucket)
.key(key)
.region(uriRegion)
.isPathStyle(isPathStyle)
.queryParams(queryParams)
.build();
}

private Region resolveRegionForGetUrl(GetUrlRequest getUrlRequest) {
if (getUrlRequest.region() == null && this.region == null) {
throw new IllegalArgumentException("Region should be provided either in GetUrlRequest object or S3Utilities object");
Expand Down Expand Up @@ -368,6 +472,21 @@ private UseGlobalEndpointResolver createUseGlobalEndpointResolver() {
return new UseGlobalEndpointResolver(config);
}

private void parseQuery(Map<String, List<String>> queryParams, String queryPart) {
String[] params = queryPart.split("&");
for (String param: params) {
String[] keyValuePair = param.split("=", 2);
String key = keyValuePair[0];
if (key.isEmpty()) {
continue;
}
List<String> paramValues = queryParams.containsKey(key) ? queryParams.get(key) : new ArrayList<>();
String[] valuesPart = keyValuePair[1].split(",");
Collections.addAll(paramValues, valuesPart);
queryParams.put(key, paramValues);
}
}

/**
* Builder class to construct {@link S3Utilities} object
*/
Expand Down
Loading