diff --git a/api/src/pcapi/scripts/delete_offer_description/main.py b/api/src/pcapi/scripts/delete_offer_description/main.py new file mode 100644 index 0000000000..e149e54cc9 --- /dev/null +++ b/api/src/pcapi/scripts/delete_offer_description/main.py @@ -0,0 +1,82 @@ +import argparse +import datetime +import logging +import math +import statistics +import time + +import pytz +from sqlalchemy import func +from sqlalchemy import text + +import pcapi.core.offers.models as offers_models +from pcapi.models import db + + +logger = logging.getLogger(__name__) + +BATCH_SIZE = 1_000 +REPORT_EVERY = 1_000 + + +def _get_eta(end: int, current: int, elapsed_per_batch: list[int]) -> str: + left_to_do = end - current + seconds_eta = left_to_do / BATCH_SIZE * statistics.mean(elapsed_per_batch) + eta = datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds_eta) + eta = eta.astimezone(pytz.timezone("Europe/Paris")) + str_eta = eta.strftime("%d/%m/%Y %H:%M:%S") + return str_eta + + +def delete_description(starting_id: int, ending_id: int, not_dry: bool = False) -> None: + max_id = db.session.query(func.max(offers_models.Offer.id)).scalar() + vacuum_treshold = int(max_id * 0.19) + vacuum_treshold_rounded = math.ceil(vacuum_treshold / 1000) * 1000 + logger.info("Run a vacuum every %s offers", vacuum_treshold) + elapsed_per_batch = [] + to_report = 0 + for i in range(starting_id, ending_id, BATCH_SIZE): + start_time = time.perf_counter() + db.session.execute( + """ + update offer set "description" = null + where id between :start and :end and "productId" is not null + """, + params={"start": i, "end": i + BATCH_SIZE}, + ) + if not_dry: + db.session.commit() + if i % vacuum_treshold_rounded == 0: + start_maintenance_time = time.perf_counter() + # VACUUM must run outside a transaction context. + # This is the only way i could make it work + logger.info("Starting vacuum and analyze offers") + db.session.execute(text("COMMIT")) + db.session.execute(text("VACUUM offer")) + db.session.execute(text("ANALYZE offer")) + logger.info("Ending vacuum and analyze offers - %s", int(time.perf_counter() - start_maintenance_time)) + else: + db.session.rollback() + elapsed_per_batch.append(int(time.perf_counter() - start_time)) + eta = _get_eta(ending_id, starting_id, elapsed_per_batch) + to_report += BATCH_SIZE + if to_report >= REPORT_EVERY: + to_report = 0 + logger.info("BATCH : id from %s | eta = %s", i, eta) + + +if __name__ == "__main__": + from pcapi.flask_app import app + + app.app_context().push() + + parser = argparse.ArgumentParser(description="Delete description for offers linked to a product") + parser.add_argument("--starting-id", type=int, default=0, help="starting offer id") + parser.add_argument("--ending-id", type=int, default=0, help="ending offer id") + parser.add_argument("--not-dry", action="store_true", help="set to really process (dry-run by default)") + args = parser.parse_args() + + if args.starting_id > args.ending_id: + raise ValueError('"start" must be less than "end"') + + delete_description(args.starting_id, args.ending_id, args.not_dry)