Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move data from sto2 to sto4 location #1448

Merged
merged 43 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
ef6dc19
add sto4 columns
i-oden Jul 26, 2023
37f9ce2
migration
i-oden Jul 26, 2023
27283af
new minio
i-oden Jul 26, 2023
4ee856b
creating new unit should place storage in sto4
i-oden Jul 26, 2023
815e8c2
update migration to include date start
i-oden Jul 26, 2023
72e2285
new command for updating unit info
i-oden Jul 26, 2023
bf86a51
update unit - get unit object
i-oden Jul 27, 2023
5ba548b
prompt to ask for overwrite if sto4 info already exists.
i-oden Jul 27, 2023
0d68138
save sto4 info
i-oden Jul 27, 2023
f5f50f8
tests for new command update_unit
i-oden Jul 27, 2023
31bfe17
black
i-oden Jul 27, 2023
2c770d8
uncomment other tests
i-oden Jul 27, 2023
008aea8
black
i-oden Jul 27, 2023
4e8d0b2
Delete sto4 variables from memory
i-oden Jul 27, 2023
9c0c54c
add comment for cleaning up
i-oden Jul 27, 2023
1a0e15d
update tests for listing lost files
i-oden Jul 27, 2023
fe23265
tests updated for listing missing files when not specifying project
i-oden Jul 27, 2023
89167b6
black
i-oden Jul 27, 2023
944d627
move use_sto4 to utils and add test
i-oden Jul 27, 2023
6f52c2e
fixed command test
i-oden Jul 27, 2023
8b60330
fixed test
i-oden Jul 27, 2023
9feb424
tests for missing bucket
i-oden Jul 28, 2023
ae26721
delete lost files and tests
i-oden Jul 28, 2023
87d3bae
black
i-oden Jul 28, 2023
c887f1e
uncommented tests
i-oden Jul 28, 2023
e7d4e58
check if to use sto4 in get_s3_info
i-oden Jul 28, 2023
b287046
black
i-oden Jul 28, 2023
ec4426e
refactored commands
i-oden Jul 28, 2023
a7e7984
sprintlog
i-oden Jul 28, 2023
5686e10
update docker compose
i-oden Jul 28, 2023
77dcfe3
vlack
i-oden Jul 28, 2023
fd9824e
uncomment tests in utils
i-oden Jul 28, 2023
4267827
indent
i-oden Jul 28, 2023
1912e8f
black
i-oden Jul 28, 2023
f8493ea
placeholder for tests
i-oden Jul 31, 2023
43ac4b0
black
i-oden Jul 31, 2023
8b26005
logging in utils function instead
i-oden Jul 31, 2023
817bbf4
refactor logging
i-oden Jul 31, 2023
74d5be9
tests to check that project created in correct location
i-oden Jul 31, 2023
0cac898
Merge branch 'dev' into DDS-1419-move-data-from-sto-2-to-sto-4
i-oden Aug 8, 2023
154c908
Merge branch 'dev' into DDS-1419-move-data-from-sto-2-to-sto-4
i-oden Aug 8, 2023
ffb7cf5
Merge branch 'dev' into DDS-1419-move-data-from-sto-2-to-sto-4
i-oden Aug 8, 2023
6034d5c
Update dds_web/api/api_s3_connector.py
i-oden Aug 9, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions SPRINTLOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,4 @@ _Nothing merged in CLI during this sprint_
- Empty endpoint: `ProjectBusy` ([#1446](https://github.com/ScilifelabDataCentre/dds_web/pull/1446))
- Rename storage-related columns in `Unit` table ([#1447](https://github.com/ScilifelabDataCentre/dds_web/pull/1447))
- Dependency: Bump `cryptography` to 41.0.3 due to security vulnerability alerts(s) ([#1451](https://github.com/ScilifelabDataCentre/dds_web/pull/1451))
- Allow for change of storage location ([#1448](https://github.com/ScilifelabDataCentre/dds_web/pull/1448))
2 changes: 2 additions & 0 deletions dds_web/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,13 @@ def load_user(user_id):
quarterly_usage,
collect_stats,
monitor_usage,
update_unit,
)

# Add flask commands - general
app.cli.add_command(fill_db_wrapper)
app.cli.add_command(create_new_unit)
app.cli.add_command(update_unit)
app.cli.add_command(update_uploaded_file_with_log)
app.cli.add_command(lost_files_s3_db)

Expand Down
14 changes: 10 additions & 4 deletions dds_web/api/api_s3_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

from dds_web.database import models
import dds_web.utils


####################################################################################################
Expand Down Expand Up @@ -52,13 +53,18 @@ def __exit__(self, exc_type, exc_value, tb):

def get_s3_info(self):
"""Get information required to connect to cloud."""
i-oden marked this conversation as resolved.
Show resolved Hide resolved
# Check if to use sto4
use_sto4 = dds_web.utils.use_sto4(
unit_object=self.project.responsible_unit, project_object=self.project
)

endpoint, name, accesskey, secretkey = (
models.Unit.query.filter_by(id=self.project.responsible_unit.id)
.with_entities(
models.Unit.sto2_endpoint,
models.Unit.sto2_name,
models.Unit.sto2_access,
models.Unit.sto2_secret,
models.Unit.sto4_endpoint if use_sto4 else models.Unit.sto2_endpoint,
models.Unit.sto4_name if use_sto4 else models.Unit.sto2_name,
models.Unit.sto4_access if use_sto4 else models.Unit.sto2_access,
models.Unit.sto4_secret if use_sto4 else models.Unit.sto2_secret,
)
.one_or_none()
)
Expand Down
161 changes: 137 additions & 24 deletions dds_web/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys
import datetime
from dateutil.relativedelta import relativedelta
import gc

# Installed
import click
Expand Down Expand Up @@ -130,10 +131,10 @@ def create_new_unit(
external_display_name=external_display_name,
contact_email=contact_email,
internal_ref=internal_ref or public_id,
sto2_endpoint=safespring_endpoint,
sto2_name=safespring_name,
sto2_access=safespring_access,
sto2_secret=safespring_secret,
sto4_endpoint=safespring_endpoint,
sto4_name=safespring_name,
sto4_access=safespring_access,
sto4_secret=safespring_secret,
days_in_available=days_in_available,
days_in_expired=days_in_expired,
quota=quota,
Expand All @@ -144,6 +145,61 @@ def create_new_unit(

flask.current_app.logger.info(f"Unit '{name}' created")

# Clean up information
del safespring_endpoint
del safespring_name
del safespring_access
del safespring_secret
gc.collect()


@click.command("update-unit")
@click.option("--unit-id", "-u", type=str, required=True)
@click.option("--sto4-endpoint", "-se", type=str, required=True)
@click.option("--sto4-name", "-sn", type=str, required=True)
@click.option("--sto4-access", "-sa", type=str, required=True)
@click.option("--sto4-secret", "-ss", type=str, required=True)
@flask.cli.with_appcontext
def update_unit(unit_id, sto4_endpoint, sto4_name, sto4_access, sto4_secret):
"""Update unit info."""
# Imports
import rich.prompt
from dds_web import db
from dds_web.utils import current_time
from dds_web.database import models

# Get unit
unit: models.Unit = models.Unit.query.filter_by(public_id=unit_id).one_or_none()
if not unit:
flask.current_app.logger.error(f"There is no unit with the public ID '{unit_id}'.")
return

# Warn user if sto4 info already exists
if unit.sto4_start_time:
do_update = rich.prompt.Confirm.ask(
f"Unit '{unit_id}' appears to have sto4 variables set already. Are you sure you want to overwrite them?"
)
if not do_update:
flask.current_app.logger.info(f"Cancelling sto4 update for unit '{unit_id}'.")
return

# Set sto4 info
unit.sto4_start_time = current_time()
unit.sto4_endpoint = sto4_endpoint
unit.sto4_name = sto4_name
unit.sto4_access = sto4_access
unit.sto4_secret = sto4_secret
db.session.commit()

flask.current_app.logger.info(f"Unit '{unit_id}' updated successfully")

# Clean up information
del sto4_endpoint
del sto4_name
del sto4_access
del sto4_secret
gc.collect()


@click.command("update-uploaded-file")
@click.option("--project", "-p", type=str, required=True)
Expand Down Expand Up @@ -235,7 +291,8 @@ def list_lost_files(project_id: str):
# Imports
import boto3
from dds_web.database import models
from dds_web.utils import list_lost_files_in_project
from dds_web.utils import list_lost_files_in_project, use_sto4
from dds_web.errors import S3InfoNotFoundError

if project_id:
flask.current_app.logger.debug(f"Searching for lost files in project '{project_id}'.")
Expand All @@ -248,12 +305,26 @@ def list_lost_files(project_id: str):
# Start s3 session
session = boto3.session.Session()

# Check which Safespring storage location to use
# Use sto4 if project created after sto4 info added
try:
sto4: bool = use_sto4(unit_object=project.responsible_unit, project_object=project)
except S3InfoNotFoundError as err:
flask.current_app.logger.error(str(err))
sys.exit(1)

# Connect to S3
resource = session.resource(
service_name="s3",
endpoint_url=project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto2_secret,
endpoint_url=project.responsible_unit.sto4_endpoint
if sto4
else project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto4_access
if sto4
else project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto4_secret
if sto4
else project.responsible_unit.sto2_secret,
)

# List the lost files
Expand Down Expand Up @@ -291,20 +362,34 @@ def list_lost_files(project_id: str):
# Start s3 session
session = boto3.session.Session()

# Connect to S3
resource_unit = session.resource(
service_name="s3",
endpoint_url=unit.sto2_endpoint,
aws_access_key_id=unit.sto2_access,
aws_secret_access_key=unit.sto2_secret,
)

# Counts
in_db_but_not_in_s3_count: int = 0
in_s3_but_not_in_db_count: int = 0

# List files in all projects
for proj in unit.projects:
# Check which Safespring storage location to use
# Use sto4 if roject created after sto4 info added
try:
sto4: bool = use_sto4(unit_object=unit, project_object=proj)
except S3InfoNotFoundError as err:
flask.current_app.logger.error(str(err))
continue

# Connect to S3
resource_unit = session.resource(
service_name="s3",
endpoint_url=proj.responsible_unit.sto4_endpoint
if sto4
else proj.responsible_unit.sto2_endpoint,
aws_access_key_id=proj.responsible_unit.sto4_access
if sto4
else proj.responsible_unit.sto2_access,
aws_secret_access_key=proj.responsible_unit.sto4_secret
if sto4
else proj.responsible_unit.sto2_secret,
)

# List the lost files
try:
in_db_but_not_in_s3, in_s3_but_not_in_db = list_lost_files_in_project(
Expand Down Expand Up @@ -338,7 +423,8 @@ def add_missing_bucket(project_id: str):
import boto3
from botocore.client import ClientError
from dds_web.database import models
from dds_web.utils import bucket_is_valid
from dds_web.utils import bucket_is_valid, use_sto4
from dds_web.errors import S3InfoNotFoundError

# Get project object
project: models.Project = models.Project.query.filter_by(public_id=project_id).one_or_none()
Expand All @@ -354,12 +440,25 @@ def add_missing_bucket(project_id: str):
# Start s3 session
session = boto3.session.Session()

# Use sto4 if project created after sto4 info added
try:
sto4 = use_sto4(unit_object=project.responsible_unit, project_object=project)
except S3InfoNotFoundError as err:
flask.current_app.logger.error(str(err))
sys.exit(1)

# Connect to S3
resource = session.resource(
service_name="s3",
endpoint_url=project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto2_secret,
endpoint_url=project.responsible_unit.sto4_endpoint
if sto4
else project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto4_access
if sto4
else project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto4_secret
if sto4
else project.responsible_unit.sto2_secret,
)

# Check if bucket exists
Expand Down Expand Up @@ -392,7 +491,8 @@ def delete_lost_files(project_id: str):
# Imports
import boto3
from dds_web.database import models
from dds_web.utils import list_lost_files_in_project
from dds_web.utils import list_lost_files_in_project, use_sto4
from dds_web.errors import S3InfoNotFoundError

# Get project object
project: models.Project = models.Project.query.filter_by(public_id=project_id).one_or_none()
Expand All @@ -403,12 +503,25 @@ def delete_lost_files(project_id: str):
# Start s3 session
session = boto3.session.Session()

# Use sto4 if project created after sto4 info added
try:
sto4: bool = use_sto4(unit_object=project.responsible_unit, project_object=project)
except S3InfoNotFoundError as err:
flask.current_app.logger.error(str(err))
sys.exit(1)

# Connect to S3
resource = session.resource(
service_name="s3",
endpoint_url=project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto2_secret,
endpoint_url=project.responsible_unit.sto4_endpoint
if sto4
else project.responsible_unit.sto2_endpoint,
aws_access_key_id=project.responsible_unit.sto4_access
if sto4
else project.responsible_unit.sto2_access,
aws_secret_access_key=project.responsible_unit.sto4_secret
if sto4
else project.responsible_unit.sto2_secret,
)

# Get list of lost files
Expand Down
7 changes: 7 additions & 0 deletions dds_web/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ class Unit(db.Model):
sto2_access = db.Column(db.String(255), unique=False, nullable=False) # unique=True later
sto2_secret = db.Column(db.String(255), unique=False, nullable=False) # unique=True later

# New safespring storage
sto4_start_time = db.Column(db.DateTime(), nullable=True)
sto4_endpoint = db.Column(db.String(255), unique=False, nullable=True) # unique=True later
sto4_name = db.Column(db.String(255), unique=False, nullable=True) # unique=True later
sto4_access = db.Column(db.String(255), unique=False, nullable=True) # unique=True later
sto4_secret = db.Column(db.String(255), unique=False, nullable=True) # unique=True later

days_in_available = db.Column(db.Integer, unique=False, nullable=False, default=90)
counter = db.Column(db.Integer, unique=False, nullable=True)
days_in_expired = db.Column(db.Integer, unique=False, nullable=False, default=30)
Expand Down
24 changes: 24 additions & 0 deletions dds_web/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DDSArgumentError,
NoSuchProjectError,
MaintenanceOngoingException,
S3InfoNotFoundError,
)
import flask_mail
import flask_login
Expand Down Expand Up @@ -725,3 +726,26 @@ def list_lost_files_in_project(project, s3_resource):
)

return diff_db, diff_s3


def use_sto4(unit_object, project_object) -> bool:
"""Check if project is newer than sto4 info, in that case return True."""
project_id_logging: str = f"Safespring location for project '{project_object.public_id}': "
sto4_endpoint_added = unit_object.sto4_start_time
if sto4_endpoint_added and project_object.date_created > sto4_endpoint_added:
if not all(
[
unit_object.sto4_endpoint,
unit_object.sto4_name,
unit_object.sto4_access,
unit_object.sto4_secret,
]
):
raise S3InfoNotFoundError(
message=f"One or more sto4 variables are missing for unit {unit_object.public_id}."
)
flask.current_app.logger.info(f"{project_id_logging}sto4")
return True

flask.current_app.logger.info(f"{project_id_logging}sto2")
return False
21 changes: 21 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,27 @@ services:
# source: ./minio-data
# target: /data

minio2: # Added in order to be able to test the new sto4 move
container_name: dds_minio_2
image: minio/minio:RELEASE.2022-02-24T22-12-01Z
profiles:
- s3
- full-dev
- cli
command: server /data --console-address ":9003"
ports:
- 127.0.0.1:9002:9000
- 127.0.0.1:9003:9003
environment:
MINIO_ROOT_USER: minio2 # access key
MINIO_ROOT_PASSWORD: minioPassword2 # secret key
# NOTE: Uncomment if you want to keep your data.
# Mounts a folder into the container to make uploaded data persistent.
# volumes:
# - type: bind
# source: ./minio-data
# target: /data

mailcatcher:
container_name: dds_mailcatcher
image: sj26/mailcatcher:latest
Expand Down
Loading