diff --git a/hadoop-hdds/docs/content/recipe/BotoClient.md b/hadoop-hdds/docs/content/recipe/BotoClient.md new file mode 100644 index 000000000000..d8dc02d4bc51 --- /dev/null +++ b/hadoop-hdds/docs/content/recipe/BotoClient.md @@ -0,0 +1,189 @@ +--- +title: Access Ozone object store with Amazon Boto3 client +linktitle: Ozone with Boto3 Client +summary: How to access Ozone object store with Boto3 client? +--- + + +This recipe shows how Ozone object store can be accessed from Boto3 client. Following apis were verified: + + - Create bucket + - List bucket + - Head bucket + - Delete bucket + - Upload file + - Download file + - Delete objects(keys) + - Head object + - Multipart upload + + +## Requirements + +You will need a higher version of Python3 for your Boto3 client as Boto3 installation requirement indicates at here: +https://boto3.amazonaws.com/v1/documentation/api/latest/index.html + +## Obtain resource to Ozone +You may reference Amazon Boto3 documentation regarding the creation of 's3' resources at here: +https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html + + s3 = boto3.resource('s3', + endpoint_url='http://localhost:9878', + aws_access_key_id='testuser/scm@EXAMPLE.COM', + aws_secret_access_key='c261b6ecabf7d37d5f9ded654b1c724adac9bd9f13e247a235e567e8296d2999' + ) + 'endpoint_url' is pointing to Ozone s3 endpoint. + + +## Obtain client to Ozone via session +You may reference Amazon Boto3 documentation regarding session at here: +https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html + + Create a session + session = boto3.session.Session() + + Obtain s3 client to Ozone via session: + + s3_client = session.client( + service_name='s3', + aws_access_key_id='testuser/scm@EXAMPLE.COM', + aws_secret_access_key='c261b6ecabf7d37d5f9ded654b1c724adac9bd9f13e247a235e567e8296d2999', + endpoint_url='http://localhost:9878', + ) + 'endpoint_url' is pointing to Ozone s3 endpoint. + + In our code sample below, we're demonstrating the usage of both s3 and s3_client. + +There are multiple ways to configure Boto3 client credentials if you're connecting to a secured cluster. In these cases, +the above lines of passing 'aws_access_key_id' and 'aws_secret_access_key' when creating Ozone s3 client shall be skipped. + +Please refer to Boto3 documentation for details at here: +https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + + +### Create a bucket + response = s3_client.create_bucket(Bucket='bucket1') + print(response) + +This will create a bucket 'bucket1' in Ozone volume 's3v'. + +### List buckets + response = s3_client.list_buckets() + print('Existing buckets:') + for bucket in response['Buckets']: + print(f' {bucket["Name"]}') + +This will list all buckets in Ozone volume 's3v'. + +### Head a bucket + response = s3_client.head_bucket(Bucket='bucket1') + print(response) + +This will head bucket 'bucket1' in Ozone volume 's3v'. + +### Delete a bucket + response = s3_client.delete_bucket(Bucket='bucket1') + print(response) + +This will delete the bucket 'bucket1' from Ozone volume 's3v'. + +### Upload a file + response = s3.Bucket('bucket1').upload_file('./README.md','README.md') + print(response) + +This will upload 'README.md' to Ozone creates a key 'README.md' in volume 's3v'. + +### Download a file + response = s3.Bucket('bucket1').download_file('README.md', 'download.md') + print(response) + +This will download 'README.md' from Ozone volume 's3v' to local and create a file with name 'download.md'. + +### Head an object + response = s3_client.head_object(Bucket='bucket1', Key='README.md') + print(response) + +This will head object 'README.md' from Ozone volume 's3v' in the bucket 'bucket1'. + +### Delete Objects + response = s3_client.delete_objects( + Bucket='bucket1', + Delete={ + 'Objects': [ + { + 'Key': 'README4.md', + }, + { + 'Key': 'README3.md', + }, + ], + 'Quiet': False, + }, + ) + +This will delete objects 'README3.md' and 'README4.md' from Ozone volume 's3v' in bucket 'bucket1'. + +### Multipart upload + response = s3_client.create_multipart_upload(Bucket='bucket1', Key='key1') + print(response) + uid=response['UploadId'] + print(uid) + + response = s3_client.upload_part_copy( + Bucket='bucket1', + CopySource='/bucket1/maven.gz', + Key='key1', + PartNumber=1, + UploadId=str(uid) + ) + print(response) + etag1=response.get('CopyPartResult').get('ETag') + print(etag1) + + response = s3_client.upload_part_copy( + Bucket='bucket1', + CopySource='/bucket1/maven1.gz', + Key='key1', + PartNumber=2, + UploadId=str(uid) + ) + print(response) + etag2=response.get('CopyPartResult').get('ETag') + print(etag2) + + response = s3_client.complete_multipart_upload( + Bucket='bucket1', + Key='key1', + MultipartUpload={ + 'Parts': [ + { + 'ETag': str(etag1), + 'PartNumber': 1, + }, + { + 'ETag': str(etag2), + 'PartNumber': 2, + }, + ], + }, + UploadId=str(uid), + ) + print(response) + +This will use 'maven.gz' and 'maven1.gz' as copy source from Ozone volume 's3v' to create a new object 'key1' +in Ozone volume 's3v'. Please note 'ETag's is required and important for the call. diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index c1da927cd4d0..5454799d12ee 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -28,7 +28,7 @@ UTF-8 true - 20200625-1 + 20210226-1 diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/boto3.robot b/hadoop-ozone/dist/src/main/smoketest/s3/boto3.robot new file mode 100644 index 000000000000..6a575f39efbd --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/s3/boto3.robot @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation S3 gateway test with Boto3 Client +Library OperatingSystem +Library String +Library Process +Library BuiltIn +Resource ../commonlib.robot +Resource ./commonawslib.robot +Test Timeout 15 minutes +Suite Setup Setup s3 tests + +*** Variables *** +${ENDPOINT_URL} http://s3g:9878 +${BUCKET} generated + +*** Test Cases *** + +Bobo3 Client Test + ${result} = Execute python3 /opt/hadoop/smoketest/s3/boto_client.py ${ENDPOINT_URL} ${BUCKET} diff --git a/hadoop-ozone/dist/src/main/smoketest/s3/boto_client.py b/hadoop-ozone/dist/src/main/smoketest/s3/boto_client.py new file mode 100755 index 000000000000..5185271cded6 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/s3/boto_client.py @@ -0,0 +1,264 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import random +import string +import logging +import json +import unittest +import boto3 +from botocore.client import Config +from botocore.exceptions import ClientError +import os.path +from os import path + +class TestBotoClient(unittest.TestCase): + + s3 = None + s3_client = None + setup_done = False + target_bucket = None + ozone_endpoint_url = None + + def setUp(self): + if TestBotoClient.setup_done: + return + + TestBotoClient.ozone_endpoint_url = sys.argv[1] + TestBotoClient.target_bucket = sys.argv[2] + TestBotoClient.setup_done = True + + TestBotoClient.s3 = boto3.resource('s3', + endpoint_url=self.ozone_endpoint_url + ) + + TestBotoClient.s3_client = boto3.session.Session().client( + service_name='s3', + endpoint_url=self.ozone_endpoint_url + ) + + try: + response = self.s3_client.create_bucket(Bucket='boto-bucket999') + print(response) + + response = self.s3_client.upload_file("README.md", str(self.target_bucket), "README3.md") + print(response) + + response = self.s3.Bucket(str(self.target_bucket)).upload_file('README.md','README4.md') + print(response) + + self.s3.Bucket(str(self.target_bucket)).upload_file('README.md','README10.md') + print(response) + except ClientError as e: + logging.error(e) + print(e) + + f = open('multiUpload.gz',"wb") + f.seek(10485760) + f.write(b"\0") + f.close() + self.s3.Bucket(str(self.target_bucket)).upload_file('./multiUpload.gz','multiUpload.1.gz') + self.s3.Bucket(str(self.target_bucket)).upload_file('./multiUpload.gz','multiUpload.2.gz') + + def test_create_bucket(self): + self.assertTrue(self.s3_client is not None) + try: + letters = string.ascii_lowercase + bucket_name = ''.join(random.choice(letters) for i in range(10)) + response = self.s3_client.create_bucket(Bucket='bucket-' + str(bucket_name)) + print(response) + self.assertTrue(str(bucket_name) in response.get('Location')) + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_list_bucket(self): + self.assertTrue(self.s3_client is not None) + try: + response = self.s3_client.list_buckets() + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_head_bucket(self): + self.assertTrue(self.s3_client is not None) + try: + response = self.s3_client.head_bucket(Bucket=self.target_bucket) + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_bucket_delete(self): + self.assertTrue(self.s3_client is not None) + try: + response = self.s3_client.delete_bucket(Bucket='boto-bucket999') + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 204) + print(response) + except ClientError as e: + logging.error(e) + return False + return True + + def test_upload_file(self): + self.assertTrue(self.s3 is not None) + try: + self.s3.Bucket(str(self.target_bucket)).upload_file('./README.md','README1.md') + response = self.s3_client.head_object(Bucket=str(self.target_bucket), Key='README1.md') + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_download_file(self): + self.assertTrue(self.s3 is not None) + try: + self.s3.Bucket(str(self.target_bucket)).download_file('README10.md', 'download.md') + self.assertTrue(path.exists("./download.md")) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_delete_objects(self): + self.assertTrue(self.s3_client is not None) + try: + response = self.s3_client.delete_objects( + Bucket=str(self.target_bucket), + Delete={ + 'Objects': [ + { + 'Key': 'README4.md', + }, + { + 'Key': 'README3.md', + }, + ], + 'Quiet': False, + }, + ) + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_head_object(self): + self.assertTrue(self.s3_client is not None) + try: + response = self.s3_client.head_object(Bucket=str(self.target_bucket), Key='README10.md') + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + + def test_multi_uploads(self): + self.assertTrue(self.s3_client is not None) + try: + lts = string.ascii_lowercase + key_name = ''.join(random.choice(lts) for i in range(10)) + response = self.s3_client.create_multipart_upload(Bucket=str(self.target_bucket), Key=str(key_name)) + print(response) + uid=response['UploadId'] + + copy1 = self.target_bucket + "/multiUpload.1.gz" + response = self.s3_client.upload_part_copy( + Bucket=str(self.target_bucket), + CopySource=str(copy1), + Key=str(key_name), + PartNumber=1, + UploadId=str(uid) + ) + etag1=response.get('CopyPartResult').get('ETag') + + copy2 = self.target_bucket + "/multiUpload.2.gz" + response = self.s3_client.upload_part_copy( + Bucket=str(self.target_bucket), + CopySource=str(copy2), + Key=str(key_name), + PartNumber=2, + UploadId=str(uid) + ) + etag2=response.get('CopyPartResult').get('ETag') + + response = self.s3_client.complete_multipart_upload( + Bucket=str(self.target_bucket), + Key=str(key_name), + MultipartUpload={ + 'Parts': [ + { + 'ETag': str(etag1), + 'PartNumber': 1, + }, + { + 'ETag': str(etag2), + 'PartNumber': 2, + }, + ], + }, + UploadId=str(uid), + ) + self.assertTrue(response.get('ResponseMetadata').get('HTTPStatusCode') == 200) + print(response) + except ClientError as e: + print(e) + logging.error(e) + return False + return True + +if __name__ == '__main__': + #unittest.main() + suite = unittest.TestSuite() + + suite.addTest(TestBotoClient('test_create_bucket')) + suite.addTest(TestBotoClient('test_list_bucket')) + suite.addTest(TestBotoClient('test_head_bucket')) + suite.addTest(TestBotoClient('test_bucket_delete')) + suite.addTest(TestBotoClient('test_upload_file')) + suite.addTest(TestBotoClient('test_download_file')) + suite.addTest(TestBotoClient('test_delete_objects')) + suite.addTest(TestBotoClient('test_head_object')) + suite.addTest(TestBotoClient('test_multi_uploads')) + + result = unittest.TextTestRunner(verbosity=2).run(suite) + + if result.wasSuccessful(): + print("Boto3 Client Test PASSED!") + exit(0) + else: + print("Boto3 Client Test FAILED!") + exit(1)