Skip to content

Commit

Permalink
fix environment management for upload and download (#32)
Browse files Browse the repository at this point in the history
* fix environment management for upload and download

* add log for environment used
  • Loading branch information
NicolasDuchenne authored Feb 12, 2025
1 parent 12e6631 commit 7d14f24
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dist/
*.sqlite3
*.db
*.duckdb
*.duckdb.*
*.csv
*.parquet
*.xlsx
Expand Down
26 changes: 17 additions & 9 deletions pipelines/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import click

# Importer et charger les variables d'environnement depuis config.py
from pipelines.config.config import load_env_variables
from pipelines.config.config import get_environment, load_env_variables

load_env_variables()

Expand All @@ -15,6 +15,8 @@
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

logger = logging.getLogger(__name__)


@click.group()
def cli():
Expand Down Expand Up @@ -88,30 +90,36 @@ def run_build_database(refresh_type, custom_years, drop_tables):
@click.option(
"--env",
type=click.Choice(["dev", "prod"]),
default="prod",
help="Environment to download from",
default=None,
help="Environment to download from. It will override environment defined in .env",
)
def run_download_database(env):
"""Download database from S3."""
os.environ["ENVIRONMENT"] = env
if env is not None:
os.environ["ENV"] = env
env = get_environment(default="prod")
logger.info(f"Running on env {env}")
module = importlib.import_module("tasks.download_database")
task_func = getattr(module, "execute")
task_func()
task_func(env)


@run.command("upload_database")
@click.option(
"--env",
type=click.Choice(["dev", "prod"]),
default="dev",
help="Environment to upload to",
default=None,
help="Environment to upload to. It will override environment defined in .env",
)
def run_upload_database(env):
"""Upload database to S3."""
os.environ["ENVIRONMENT"] = env
if env is not None:
os.environ["ENV"] = env
env = get_environment(default="dev")
logger.info(f"Running on env {env}")
module = importlib.import_module("tasks.upload_database")
task_func = getattr(module, "execute")
task_func()
task_func(env)


if __name__ == "__main__":
Expand Down
10 changes: 4 additions & 6 deletions pipelines/tasks/download_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,19 @@

import logging

from pipelines.config.config import get_environment, get_s3_path
from pipelines.config.config import get_s3_path
from pipelines.tasks._common import DUCKDB_FILE
from pipelines.utils.storage_client import ObjectStorageClient

logger = logging.getLogger(__name__)


def download_database_from_storage():
def download_database_from_storage(env):
"""
Download the database from Storage Object depending on the environment
This requires setting the correct environment variables for the Scaleway credentials
"""
s3 = ObjectStorageClient()

env = get_environment(default="prod")
remote_s3_path = get_s3_path(env)
local_db_path = DUCKDB_FILE

Expand All @@ -35,5 +33,5 @@ def download_database_from_storage():
)


def execute():
download_database_from_storage()
def execute(env):
download_database_from_storage(env)
9 changes: 4 additions & 5 deletions pipelines/tasks/upload_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,26 @@

import logging

from pipelines.config.config import get_environment, get_s3_path
from pipelines.config.config import get_s3_path
from pipelines.tasks._common import DUCKDB_FILE
from pipelines.utils.storage_client import ObjectStorageClient

logger = logging.getLogger(__name__)


def upload_database_to_storage():
def upload_database_to_storage(env):
"""
Upload the database built locally to Storage Object depending on the environment
This requires setting the correct environment variables for the Scaleway credentials
"""
s3 = ObjectStorageClient()

db_path = DUCKDB_FILE # Fichier local
env = get_environment(default="dev")
s3_path = get_s3_path(env) # Destination sur S3

s3.upload_object(db_path, s3_path)
logger.info(f"✅ Base uploadée sur s3://{s3.bucket_name}/{s3_path}")


def execute():
upload_database_to_storage()
def execute(env):
upload_database_to_storage(env)

0 comments on commit 7d14f24

Please sign in to comment.