Skip to content

Commit

Permalink
feat(Moderation): script to remove Products with long barcodes (#700)
Browse files Browse the repository at this point in the history
  • Loading branch information
raphodn authored Feb 2, 2025
1 parent 3e9c58c commit 33ff94d
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 2 deletions.
6 changes: 6 additions & 0 deletions open_prices/common/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from open_prices.common.openfoodfacts import import_product_db
from open_prices.common.utils import export_model_to_jsonl_gz
from open_prices.locations.models import Location
from open_prices.moderation import rules as moderation_rules
from open_prices.prices.models import Price
from open_prices.products.models import Product
from open_prices.proofs.models import Proof
Expand Down Expand Up @@ -101,6 +102,10 @@ def fix_proof_fields_task():
proof.set_missing_fields_from_prices()


def moderation_tasks():
moderation_rules.cleanup_products_with_long_barcodes()


def dump_db_task():
"""
Dump the database as JSONL files to the data directory
Expand All @@ -123,6 +128,7 @@ def dump_db_task():
"import_off_db_task": "30 15 * * *", # daily at 15:30
"update_total_stats_task": "0 1 * * *", # daily at 01:00
"fix_proof_fields_task": "10 1 * * *", # daily at 01:10
"moderation_tasks": "20 1 * * *", # daily at 01:20
"update_user_counts_task": "0 2 * * 1", # every start of the week
"update_location_counts_task": "10 2 * * 1", # every start of the week
"update_product_counts_task": "20 2 * * 1", # every start of the week
Expand Down
Empty file.
46 changes: 46 additions & 0 deletions open_prices/moderation/rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
A list of rules, to clean up the data
"""

from django.db.models import Q
from django.db.models.functions import Length

from open_prices.prices import constants as price_constants
from open_prices.products.models import Product


def cleanup_products_with_long_barcodes():
"""
Remove products (and their prices) with long barcodes
- long barcode = more than 13 characters
- only if the price come from the validation workflows
- only from unknown source (aka not from OxF)
"""
# init
price_deleted_count = 0
product_deleted_count = 0

# products with long barcodes
product_queryset = Product.objects.annotate(
code_length_annotated=Length("code")
).filter(code_length_annotated__gt=13, source=None)
print(f"Found {product_queryset.count()} products with long barcodes")

# build the price source filter query
source_query = Q()
for source in price_constants.PRICE_CREATED_FROM_PRICE_TAG_VALIDATION_SOURCE_LIST:
source_query |= Q(source__contains=source)

# loop on each product
for product in product_queryset:
product_prices_from_source_queryset = product.prices.filter(source_query)
if product_prices_from_source_queryset.exists():
for price in product_prices_from_source_queryset.all():
price.delete() # delete 1 by 1 to trigger signals
price_deleted_count += 1
if product.prices.count() == 0:
product.delete()
product_deleted_count += 1

# recap
print(f"Deleted {price_deleted_count} prices and {product_deleted_count} products")
30 changes: 30 additions & 0 deletions open_prices/moderation/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from django.test import TestCase

from open_prices.moderation.rules import cleanup_products_with_long_barcodes
from open_prices.prices.factories import PriceFactory
from open_prices.prices.models import Price
from open_prices.products.factories import ProductFactory
from open_prices.products.models import Product


class ModerationRulesTest(TestCase):
@classmethod
def setUpTestData(cls):
cls.product_ok = ProductFactory(code="0123456789100") # 13 characters
PriceFactory(product_code=cls.product_ok.code, source="mobile")
PriceFactory(
product_code=cls.product_ok.code,
source="web - /experiments/price-validation-assistant",
)
cls.product_not_ok = ProductFactory(code="01234567891000") # 14 characters
PriceFactory(
product_code=cls.product_not_ok.code,
source="web - /experiments/price-validation-assistant",
)

def test_cleanup_products_with_long_barcodes(self):
self.assertEqual(Product.objects.count(), 2)
self.assertEqual(Price.objects.count(), 2 + 1)
cleanup_products_with_long_barcodes()
self.assertEqual(Product.objects.count(), 1) # 1 product deleted
self.assertEqual(Price.objects.count(), 2) # 1 price deleted
6 changes: 6 additions & 0 deletions open_prices/prices/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@
PRICE_PER_KILOGRAM = "KILOGRAM"
PRICE_PER_LIST = [PRICE_PER_UNIT, PRICE_PER_KILOGRAM]
PRICE_PER_CHOICES = [(key, key) for key in PRICE_PER_LIST]


PRICE_CREATED_FROM_PRICE_TAG_VALIDATION_SOURCE_LIST = [
"/experiments/price-validation-assistant",
"/experiments/contribution-assistant",
]
2 changes: 1 addition & 1 deletion open_prices/proofs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
(PRICE_TAG_EXTRACTION_TYPE, PRICE_TAG_EXTRACTION_TYPE)
]

PROOF_READY_FOR_PRICE_TAG_VALIDATION_SOURCES = [
PROOF_READY_FOR_PRICE_TAG_VALIDATION_SOURCE_LIST = [
"/proofs/add/single",
"/proofs/add/multiple",
]
Expand Down
2 changes: 1 addition & 1 deletion open_prices/proofs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def set_ready_for_price_tag_validation(self):
and self.source
and any(
source in self.source
for source in proof_constants.PROOF_READY_FOR_PRICE_TAG_VALIDATION_SOURCES
for source in proof_constants.PROOF_READY_FOR_PRICE_TAG_VALIDATION_SOURCE_LIST
)
):
self.ready_for_price_tag_validation = True
Expand Down

0 comments on commit 33ff94d

Please sign in to comment.