diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..2cbddce --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,30 @@ +name: Publish PyPI Package + +on: + release: + types: [published] + +jobs: + release_package: + permissions: + contents: write + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Build a binary wheel and a source tarball. + run: pip install wheel && python setup.py sdist bdist_wheel + + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1776f2b --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +.vscode/ +.env +.venv \ No newline at end of file diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..8c2810d --- /dev/null +++ b/LICENCE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Code Lighthouse + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e0b8945 --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# s3hive + +A tool built on top of boto3 that allows you to easily manage your S3 buckets. + + + + + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +## Overview + +s3hive is a Python tool that provides a high-level interface for working with S3 buckets. With this tool, you can easily perform common operations on your S3 buckets such as creating, deleting, listing, uploading files, etc. + +This tool uses the popular boto3 library to interact with the S3 API, making it simple and intuitive to use. + +s3hive is designed to be easy to use, with a simple and consistent API that abstracts away many of the complexities of working with S3 buckets. Whether you're a seasoned developer or just getting started, s3hive can help you streamline your S3 operations and save time. + +## Features + +- Create a new S3 bucket +- Delete an existing S3 bucket +- Generate a presigned URL to share an S3 object +- List all S3 buckets +- Upload files to an S3 bucket +- Download files from an S3 bucket +- List files in an S3 bucket +- Delete files from an S3 bucket + +This tool is a wrapper around the boto3 library. It provides a simple interface to manage your S3 buckets. + +## Getting Started + +### Installation + +You can install s3hive using pip: + +```bash +$ pip install s3hive +``` + +### Usage + +Here's an example of how to use s3hive to list all your S3 buckets: + +```python +import s3hive as s3 +import os + +ENDPOINT_URL = os.environ.get('ENDPOINT_URL') +REGION = os.environ.get('REGION') +AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID') +AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY') + +s3hive = s3.Bucket( + endpoint_url=ENDPOINT_URL, + region=REGION, + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, +) + +buckets = s3hive.list_buckets() + +print(buckets) + +# Output: +# [{ +# 'name': 'bucket1', +# 'creation_date': datetime.datetime(2020, 5, 1, 12, 0, 0, tzinfo=tzutc()) +# }] + +``` + +For more examples and detailed documentation, please visit our [GitHub repository](https://github.com/sotberd/s3hive/blob/main/example.py). + +### Methods + +| Method | Description | +| :--------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `_get_client()` | Get the S3 client. Returns a boto3 client object for the S3 service. | +| `create_bucket(bucket: str, acl: str = "private")` | Create an S3 bucket in a specified region. bucket is the name of the bucket to create, and acl is the access control list. Returns True if the bucket was created successfully, or raises an exception if an error occurs. | +| `delete_bucket(bucket: str) ` | Delete an S3 bucket. bucket is the name of the bucket to delete. Returns True if the bucket was deleted successfully, or raises an exception if an error occurs. | +| `list_buckets(names_only: bool = False)` | List all buckets in the S3 account. If names_only is True, return only the bucket names. Otherwise, return a list of dictionaries, with each dictionary containing the bucket name and creation date. Raises an exception if an error occurs. | +| `list_objects(bucket: str, keys_only: bool = False)` | List all objects in the specified bucket. If keys_only is True, return only the object keys. Otherwise, return a list of dictionaries, with each dictionary containing the object key, size, and last modified date. Raises an exception if an error occurs. | +| `create_presigned_url(bucket: str, key: str, expiration: int = 3600)` | Generate a presigned URL to share an S3 object. bucket is the name of the bucket containing the object, key is the object key, and expiration is the time in seconds for the presigned URL to remain valid. Returns the presigned URL as a string, or raises an exception if an error occurs. | +| `upload(bucket: str, file_name: str, key: str = None, extraArgs: dict = None, filesize: int = None)` | Upload an object to an S3 bucket. file_name is the path to the file to upload, bucket is the name of the bucket to upload to, key is the S3 object name. If not specified, then file_name is used. extraArgs is a dictionary of extra arguments that may be passed to the S3 API. Returns True if the file was uploaded successfully, or raises an exception if an error occurs. | +| `download(bucket: str, key: str, local_dir: str = ROOT_DIR)` | Download an object from S3 bucket to local directory. key is the S3 object key, and local_dir is the local directory to download the file to (if local_dir not provided object will stored on the root folder). Returns True if the file was downloaded successfully, or raises an exception if an error occurs. | +| `delete(bucket: str, key: str)` | Delete an object from an S3 bucket. bucket is the name of the bucket containing the object, and key is the object key. Returns True if the object was deleted successfully, or raises an exception if an error occurs. | + +## License + +s3hive is licensed under the [MIT License](https://opensource.org/license/mit/). diff --git a/example.py b/example.py new file mode 100644 index 0000000..47e5114 --- /dev/null +++ b/example.py @@ -0,0 +1,45 @@ +import s3hive as s3 +import os +from dotenv import load_dotenv + +load_dotenv() + +# Environment variables +ENDPOINT_URL = os.getenv('ENDPOINT_URL') +REGION = os.getenv('REGION') +AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID') +AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY') + +# Instantiate a Bucket object +s3hive = s3.Bucket( + endpoint_url=ENDPOINT_URL, + region=REGION, + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, +) + + +# List all buckets +buckets = s3hive.list_buckets() +print(buckets) + +# List all objects in a bucket +objects = s3hive.list_objects('my-bucket') +print(objects) + +# Create a presigned URL +url = s3hive.create_presigned_url('my-bucket', 'my-object') +print(url) + +# Upload an object +uploaded = s3hive.upload('my-bucket', 'my-object.yml', 'my-file.yml') +print(uploaded) + +# Download an object +downloaded = s3hive.download('my-bucket', 'my-file.yml', ) +print(downloaded) + +# Delete an object +marker, metadata = s3hive.delete('my-bucket', 'my-file.yml') +print(marker, metadata) + diff --git a/s3hive/__init__.py b/s3hive/__init__.py new file mode 100644 index 0000000..90a9bf5 --- /dev/null +++ b/s3hive/__init__.py @@ -0,0 +1,5 @@ +from .bucket import Bucket + +__all__ = [ + "Bucket" +] \ No newline at end of file diff --git a/s3hive/bucket.py b/s3hive/bucket.py new file mode 100644 index 0000000..ec54930 --- /dev/null +++ b/s3hive/bucket.py @@ -0,0 +1,270 @@ +import boto3 +from botocore.exceptions import ClientError +from tqdm import tqdm +import os +from typing import List, Tuple, Dict + +# Root directory of the project (used for relative paths) +ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)) + +class Bucket: + """ + The Bucket class is a wrapper for the boto3 S3 client. + It contains methods for listing objects, downloading files, uploading files, deleting objects, etc from the bucket. + The class is initialized with the endpoint URL, region, AWS access key ID, and AWS secret access key. + These credentials are required to authenticate requests to the S3 bucket. + The class is designed to provide a simple and intuitive interface for working with S3 buckets, while hiding the complexity of the underlying API calls. + """ + def __init__( + self, + endpoint_url: str, + region: str, + aws_access_key_id: str, + aws_secret_access_key: str, + ) -> None: + """ + Initialize the S3 bucket. + + :param endpoint_url: Endpoint URL for the S3 service. + :param region: The AWS region the S3 bucket is located in. + :param aws_access_key_id: The AWS access key ID for authentication. + :param aws_secret_access_key: The AWS secret access key for authentication. + """ + self.endpoint_url = endpoint_url + self.region = region + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + + def _get_client(self) -> boto3.client: + """ + Returns a boto3 client object for the S3 service. + + :return: A boto3 client object. + """ + return boto3.client( + "s3", + endpoint_url=self.endpoint_url, + region_name=self.region, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + def _client_error(self, e: ClientError)->None: + """ + Raise exception if error occurs in boto3 client + + :param e: ClientError + :return: None + """ + raise Exception(e.response) + + def create_bucket(self, bucket:str, acl:str="private") -> bool: + """ + Create an S3 bucket in a specified region + + :param bucket: Bucket to create + :param acl: Access control list. Default is private + :return: True if bucket was created, else raise an exception + """ + s3_client = self._get_client() + try: + response = s3_client.create_bucket( + Bucket=bucket, + CreateBucketConfiguration={"LocationConstraint": self.region}, + ACL=acl, + ) + if response["ResponseMetadata"]["HTTPStatusCode"] == 200: + return True + + except ClientError as e: + self._client_error(e) + + def delete_bucket(self, bucket:str) -> bool: + """ + Delete an S3 bucket + + :param bucket: Bucket to delete + :return: True if bucket was deleted, else raise an exception + """ + + s3_client = self._get_client() + try: + reponse = s3_client.delete_bucket(Bucket=bucket) + if reponse["ResponseMetadata"]["HTTPStatusCode"] == 204: + return True + + except ClientError as e: + self._client_error(e) + + def list_buckets(self, names_only:bool = False) -> List: + """ + List buckets in S3 account that you have access to by your credentials + + :param names_only: if True return only bucket names + :return: list of buckets. If error, else raise an exception + """ + + s3_client = self._get_client() + try: + response = s3_client.list_buckets() + if not names_only: + return response["Buckets"] + return [bucket["Name"] for bucket in response["Buckets"]] + + except ClientError as e: + self._client_error(e) + + def list_objects(self,bucket:str, keys_only=False) -> List: + """ + List objects in bucket + + :param keys_only: if True return only keys of objects + :return: list of objects. If error, raise an exception + """ + client = self._get_client() + try: + response = client.list_objects_v2(Bucket=bucket) + + if not keys_only: + return response["Contents"] + + return [obj["Key"] for obj in response["Contents"]] + + except ClientError as e: + self._client_error(e) + + def create_presigned_url(self,bucket:str, key:str, expiration:int=3600) -> str: + """ + Generate a presigned URL to share an S3 object + + :param bucket: The bucket name. + :param key: The object key. + :param expiration: Time in seconds for the presigned URL to remain valid. Default is 3600 seconds. + :return: Presigned URL as string. else raise an exception + """ + + # Generate a presigned URL for the S3 object + s3_client = self._get_client() + try: + return s3_client.generate_presigned_url('get_object', + Params={'Bucket': bucket, + 'Key': key}, + ExpiresIn=expiration) + except ClientError as e: + self._client_error(e) + + def upload(self,bucket:str, file_name:str, key:str=None, extra_args: Dict[str, str] = None,filesize:int=None) -> bool: + """ + Upload an object to an S3 bucket + + :param file_name: File to upload + :param bucket: Bucket to upload to + :param key: S3 object name. If not specified then file_name is used + :param extraArgs: Extra arguments that may be passed to the S3 API + :return: True if file was uploaded, else raise an exception + """ + s3_client = self._get_client() + + # If S3 key was not specified, use file_name + if key is None: + key = os.path.basename(file_name) + + # Calculate the size of the file + if filesize is None: + try: + file_size = os.path.getsize(file_name) + except FileNotFoundError: + pass + + # Upload the file + try: + with tqdm( + total=file_size, + desc=f"Uploading {file_name} to {bucket}", + unit="B", + unit_scale=True, + unit_divisor=1024, + colour="cyan", + bar_format="{l_bar}{bar:10}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]", + ) as pbar: + response = s3_client.upload_file( + file_name, + bucket, + key, + Callback=lambda bytes_transferred: pbar.update(bytes_transferred), + ExtraArgs=extra_args, + ) + + if response is None: + return True + + except ClientError as e: + self._client_error(e) + + def download(self, bucket:str, key: str, local_dir: str=ROOT_DIR) -> bool: + """ + Download an object from S3 bucket to local directory + + :param key: S3 object key + :param local_dir: local directory to download the file to + :return: True if file was uploaded, else raise an exception + """ + client = self._get_client() + + # create local directory if not exists + if not os.path.exists(local_dir): + os.makedirs(local_dir) + + try: + response = client.head_object( + Bucket=bucket, + Key=key, + ExpectedBucketOwner=self.aws_access_key_id, + ) + total_size = response["ContentLength"] / 1024 / 1024 + + # get local file path to download to + local_file = os.path.join(local_dir, os.path.basename(key)) + + with tqdm( + total=round(total_size, 2), + desc=f"Downloading to {local_file}", + unit="MB", + colour="cyan", + bar_format="{l_bar}{bar:10}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]", + ) as pbar: + response = client.download_file( + bucket, + key, + local_file, + Callback=lambda bytes_transferred: pbar.update( + round(bytes_transferred / 1024 / 1024, 2) + ), + ) + + if response is None: + return True + + except ClientError as e: + self._client_error(e) + + def delete(self, bucket:str, key:str) -> Tuple[bool, dict]: + """ + Delete an object from an S3 bucket + + :param key: S3 object key + :return: A tuple of (marker, metada) if object was deleted succefully, else raise an exception + """ + s3_client = self._get_client() + + try: + response = s3_client.delete_object(Bucket=bucket, Key=key) + if response["ResponseMetadata"]["HTTPStatusCode"] == 204: + marker = response["DeleteMarker"] if "DeleteMarker" in response else False + metadata = response["ResponseMetadata"] + return marker, metadata + + except ClientError as e: + self._client_error(e) + + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f3de66f --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="s3hive", + version="1.0.0", + author="sotberd", + author_email="sotiriosn.berdes@gmail.com", + description="A tool built on top of boto3 that allows you to easily manage your S3 buckets.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/sotberd/s3hive", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + keywords=['s3', 'boto3', 'aws', 'bucket', 's3hive'], + python_requires='>=3.8', + license='MIT', + install_requires=[ + 'boto3', + 'tqdm' + ], +) +