From ca7c0a564023b86c993865e76b164e7ff7524c33 Mon Sep 17 00:00:00 2001 From: Akash Sharma <2akash111998@gmail.com> Date: Sun, 25 Jun 2023 02:39:20 +0530 Subject: [PATCH 1/5] avoid UnboundLocalError --- airflow/providers/amazon/aws/hooks/s3.py | 2 ++ tests/providers/amazon/aws/hooks/test_s3.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 0489fa63128ad..dfe1428284922 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -229,6 +229,8 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: elif temp_split[1] == "s3": bucket_name = temp_split[0] key = "/".join(format[1].split("/")[1:]) + else: + raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') else: raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') return bucket_name, key diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index 001584ccbacc3..63f0c554ee150 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -39,6 +39,7 @@ provide_bucket_name, unify_bucket_name_and_key, ) +from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure from airflow.utils.timezone import datetime @@ -94,6 +95,10 @@ def test_parse_s3_url_virtual_hosted_style(self): parsed = S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.s3.us-west-2.amazonaws.com/test.png") assert parsed == ("DOC-EXAMPLE-BUCKET1", "test.png"), "Incorrect parsing of the s3 url" + def test_parse_invalid_s3_url_virtual_hosted_style(self): + with pytest.raises(S3HookUriParseFailure, match='Please provide a bucket name using a valid format: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"'): + S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png") + def test_parse_s3_object_directory(self): parsed = S3Hook.parse_s3_url("s3://test/this/is/not/a-real-s3-directory/") assert parsed == ("test", "this/is/not/a-real-s3-directory/"), "Incorrect parsing of the s3 url" From 7670885c0b96aa69030975dd50d23bc254bc159a Mon Sep 17 00:00:00 2001 From: Akash Sharma <2akash111998@gmail.com> Date: Sun, 25 Jun 2023 02:43:57 +0530 Subject: [PATCH 2/5] static-check --- tests/providers/amazon/aws/hooks/test_s3.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index 63f0c554ee150..d35c870af6bfb 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -96,9 +96,12 @@ def test_parse_s3_url_virtual_hosted_style(self): assert parsed == ("DOC-EXAMPLE-BUCKET1", "test.png"), "Incorrect parsing of the s3 url" def test_parse_invalid_s3_url_virtual_hosted_style(self): - with pytest.raises(S3HookUriParseFailure, match='Please provide a bucket name using a valid format: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"'): + with pytest.raises( + S3HookUriParseFailure, + match='Please provide a bucket name using a valid format: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', + ): S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png") - + def test_parse_s3_object_directory(self): parsed = S3Hook.parse_s3_url("s3://test/this/is/not/a-real-s3-directory/") assert parsed == ("test", "this/is/not/a-real-s3-directory/"), "Incorrect parsing of the s3 url" From 66adc78f1275681b7f61e13a5f3795163d7b6293 Mon Sep 17 00:00:00 2001 From: Akash Sharma <2akash111998@gmail.com> Date: Sun, 25 Jun 2023 02:54:24 +0530 Subject: [PATCH 3/5] Added valid url suggestion in the err --- airflow/providers/amazon/aws/hooks/s3.py | 4 +++- tests/providers/amazon/aws/hooks/test_s3.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index dfe1428284922..f2919e62a9e41 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -230,7 +230,9 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: bucket_name = temp_split[0] key = "/".join(format[1].split("/")[1:]) else: - raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') + raise S3HookUriParseFailure( + f'Please provide a bucket name using a valid virtually hosted format which should be of the form: https://.s3..amazonaws.com/ but provided: "{s3url}"' + ) else: raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') return bucket_name, key diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index d35c870af6bfb..1d2c1d69045f2 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -98,7 +98,7 @@ def test_parse_s3_url_virtual_hosted_style(self): def test_parse_invalid_s3_url_virtual_hosted_style(self): with pytest.raises( S3HookUriParseFailure, - match='Please provide a bucket name using a valid format: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', + match='Please provide a bucket name using a valid virtually hosted format which should be of the form: https://.s3..amazonaws.com/ but provided: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', ): S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png") From 0ddfbc2b274269972df7aa55899f9be1ee2d984e Mon Sep 17 00:00:00 2001 From: Akash Sharma <2akash111998@gmail.com> Date: Sun, 25 Jun 2023 03:10:26 +0530 Subject: [PATCH 4/5] updated err --- airflow/providers/amazon/aws/hooks/s3.py | 12 +++++++++--- tests/providers/amazon/aws/hooks/test_s3.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index f2919e62a9e41..42ffa2ea9f614 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -212,11 +212,15 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: :param s3url: The S3 Url to parse. :return: the parsed bucket name and key """ + valid_s3_format = "S3://bucket-name/key-name" + valid_s3_virtual_hosted_format = "https://bucket-name.s3.region-code.amazonaws.com/key-name" format = s3url.split("//") if re.match(r"s3[na]?:", format[0], re.IGNORECASE): parsed_url = urlsplit(s3url) if not parsed_url.netloc: - raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') + raise S3HookUriParseFailure( + f'Please provide a bucket name using a valid format of the form: {valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' + ) bucket_name = parsed_url.netloc key = parsed_url.path.lstrip("/") @@ -231,10 +235,12 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: key = "/".join(format[1].split("/")[1:]) else: raise S3HookUriParseFailure( - f'Please provide a bucket name using a valid virtually hosted format which should be of the form: https://.s3..amazonaws.com/ but provided: "{s3url}"' + f'Please provide a bucket name using a valid virtually hosted format which should be of the form: {valid_s3_virtual_hosted_format} but provided: "{s3url}"' ) else: - raise S3HookUriParseFailure(f'Please provide a bucket name using a valid format: "{s3url}"') + raise S3HookUriParseFailure( + f'Please provide a bucket name using a valid format of the form: {valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' + ) return bucket_name, key @staticmethod diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index 1d2c1d69045f2..7f904540a871e 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -98,7 +98,7 @@ def test_parse_s3_url_virtual_hosted_style(self): def test_parse_invalid_s3_url_virtual_hosted_style(self): with pytest.raises( S3HookUriParseFailure, - match='Please provide a bucket name using a valid virtually hosted format which should be of the form: https://.s3..amazonaws.com/ but provided: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', + match='Please provide a bucket name using a valid virtually hosted format which should be of the form: https://bucket-name.s3.region-code.amazonaws.com/key-name but provided: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', ): S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png") From 6cceecef5c31f764896f26f4727beae96759a0f8 Mon Sep 17 00:00:00 2001 From: Akash Sharma <2akash111998@gmail.com> Date: Sun, 25 Jun 2023 04:04:03 +0530 Subject: [PATCH 5/5] fix static checks' --- airflow/providers/amazon/aws/hooks/s3.py | 9 ++++++--- tests/providers/amazon/aws/hooks/test_s3.py | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/airflow/providers/amazon/aws/hooks/s3.py b/airflow/providers/amazon/aws/hooks/s3.py index 42ffa2ea9f614..044dc2c9a9eaa 100644 --- a/airflow/providers/amazon/aws/hooks/s3.py +++ b/airflow/providers/amazon/aws/hooks/s3.py @@ -219,7 +219,8 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: parsed_url = urlsplit(s3url) if not parsed_url.netloc: raise S3HookUriParseFailure( - f'Please provide a bucket name using a valid format of the form: {valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' + "Please provide a bucket name using a valid format of the form: " + + f'{valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' ) bucket_name = parsed_url.netloc @@ -235,11 +236,13 @@ def parse_s3_url(s3url: str) -> tuple[str, str]: key = "/".join(format[1].split("/")[1:]) else: raise S3HookUriParseFailure( - f'Please provide a bucket name using a valid virtually hosted format which should be of the form: {valid_s3_virtual_hosted_format} but provided: "{s3url}"' + "Please provide a bucket name using a valid virtually hosted format which should" + + f' be of the form: {valid_s3_virtual_hosted_format} but provided: "{s3url}"' ) else: raise S3HookUriParseFailure( - f'Please provide a bucket name using a valid format of the form: {valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' + "Please provide a bucket name using a valid format of the form: " + + f'{valid_s3_format} or {valid_s3_virtual_hosted_format} but provided: "{s3url}"' ) return bucket_name, key diff --git a/tests/providers/amazon/aws/hooks/test_s3.py b/tests/providers/amazon/aws/hooks/test_s3.py index 7f904540a871e..4a42d828da8ba 100644 --- a/tests/providers/amazon/aws/hooks/test_s3.py +++ b/tests/providers/amazon/aws/hooks/test_s3.py @@ -34,12 +34,12 @@ from airflow.exceptions import AirflowException from airflow.models import Connection +from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure from airflow.providers.amazon.aws.hooks.s3 import ( S3Hook, provide_bucket_name, unify_bucket_name_and_key, ) -from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure from airflow.utils.timezone import datetime @@ -98,7 +98,9 @@ def test_parse_s3_url_virtual_hosted_style(self): def test_parse_invalid_s3_url_virtual_hosted_style(self): with pytest.raises( S3HookUriParseFailure, - match='Please provide a bucket name using a valid virtually hosted format which should be of the form: https://bucket-name.s3.region-code.amazonaws.com/key-name but provided: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', + match="Please provide a bucket name using a valid virtually hosted format which should" + + " be of the form: https://bucket-name.s3.region-code.amazonaws.com/key-name but " + + 'provided: "https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png"', ): S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.us-west-2.amazonaws.com/test.png")