Skip to content

Commit

Permalink
Fixes s3tools#1292 - cloudfront: Parse all aws possible urls in httpu…
Browse files Browse the repository at this point in the history
…rl_to_s3uri
  • Loading branch information
fviard committed Nov 19, 2022
1 parent f8eafbb commit 6f3e1ba
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions S3/S3Uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,26 +109,37 @@ def compose_uri(bucket, object = ""):

@staticmethod
def httpurl_to_s3uri(http_url):
m=re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE | re.UNICODE)
m = re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE | re.UNICODE)
hostname, object = m.groups()[1:]
hostname = hostname.lower()
if hostname == "s3.amazonaws.com":

# Worst case scenario, we would like to be able to match something like
# my.website.com.s3-fips.dualstack.us-west-1.amazonaws.com.cn
m = re.match("(.*\.)?s3(?:\-[^\.]*)?(?:\.dualstack)?(?:\.[^\.]*)?\.amazonaws\.com(?:\.cn)?$",
hostname, re.IGNORECASE | re.UNICODE)
if not m:
raise ValueError("Unable to parse URL: %s" % http_url)

bucket = m.groups()[0]
if not bucket:
## old-style url: http://s3.amazonaws.com/bucket/object
if object.count("/") == 0:
if "/" not in object:
## no object given
bucket = object
object = ""
else:
## bucket/object
bucket, object = object.split("/", 1)
elif hostname.endswith(".s3.amazonaws.com"):
## new-style url: http://bucket.s3.amazonaws.com/object
bucket = hostname[:-(len(".s3.amazonaws.com"))]
else:
raise ValueError("Unable to parse URL: %s" % http_url)
return S3Uri(u"s3://%(bucket)s/%(object)s" % {
'bucket' : bucket,
'object' : object })
## new-style url: http://bucket.s3.amazonaws.com/object
bucket = bucket.rstrip('.')

return S3Uri(
u"s3://%(bucket)s/%(object)s" % {
'bucket' : bucket,
'object' : object
}
)

class S3UriS3FS(S3Uri):
type = "s3fs"
Expand Down

0 comments on commit 6f3e1ba

Please sign in to comment.