|
| 1 | +"""User ID anonymization utilities.""" |
| 2 | + |
| 3 | +import hashlib |
| 4 | +import logging |
| 5 | +from typing import Optional |
| 6 | + |
| 7 | +from sqlalchemy.exc import IntegrityError |
| 8 | + |
| 9 | +from models.database.user_mapping import UserMapping |
| 10 | +from app.database import get_session |
| 11 | +from utils.suid import get_suid |
| 12 | + |
| 13 | +logger = logging.getLogger("utils.user_anonymization") |
| 14 | + |
| 15 | + |
| 16 | +def _hash_user_id(user_id: str) -> str: |
| 17 | + """ |
| 18 | + Create a consistent hash of the user ID for mapping purposes. |
| 19 | +
|
| 20 | + Uses SHA-256 with a fixed salt to ensure consistent hashing |
| 21 | + while preventing rainbow table attacks. |
| 22 | + """ |
| 23 | + # Use a fixed salt - in production, this should be configurable |
| 24 | + salt = "lightspeed_user_anonymization_salt_v1" |
| 25 | + hash_input = f"{salt}:{user_id}".encode("utf-8") |
| 26 | + return hashlib.sha256(hash_input).hexdigest() |
| 27 | + |
| 28 | + |
| 29 | +def get_anonymous_user_id(auth_user_id: str) -> str: |
| 30 | + """ |
| 31 | + Get or create an anonymous UUID for a user ID from authentication. |
| 32 | +
|
| 33 | + This function: |
| 34 | + 1. Hashes the original user ID for secure storage |
| 35 | + 2. Looks up existing anonymous mapping |
| 36 | + 3. Creates new anonymous UUID if none exists |
| 37 | + 4. Returns the anonymous UUID for use in storage/analytics |
| 38 | +
|
| 39 | + Args: |
| 40 | + auth_user_id: The original user ID from authentication |
| 41 | +
|
| 42 | + Returns: |
| 43 | + Anonymous UUID string for this user |
| 44 | + """ |
| 45 | + user_id_hash = _hash_user_id(auth_user_id) |
| 46 | + |
| 47 | + with get_session() as session: |
| 48 | + # Try to find existing mapping |
| 49 | + existing_mapping = ( |
| 50 | + session.query(UserMapping).filter_by(user_id_hash=user_id_hash).first() |
| 51 | + ) |
| 52 | + |
| 53 | + if existing_mapping: |
| 54 | + logger.debug( |
| 55 | + "Found existing anonymous ID for user hash %s", user_id_hash[:8] + "..." |
| 56 | + ) |
| 57 | + return existing_mapping.anonymous_id |
| 58 | + |
| 59 | + # Create new anonymous mapping |
| 60 | + anonymous_id = get_suid() |
| 61 | + new_mapping = UserMapping(anonymous_id=anonymous_id, user_id_hash=user_id_hash) |
| 62 | + |
| 63 | + try: |
| 64 | + session.add(new_mapping) |
| 65 | + session.commit() |
| 66 | + logger.info( |
| 67 | + "Created new anonymous ID %s for user hash %s", |
| 68 | + anonymous_id, |
| 69 | + user_id_hash[:8] + "...", |
| 70 | + ) |
| 71 | + return anonymous_id |
| 72 | + |
| 73 | + except IntegrityError as e: |
| 74 | + session.rollback() |
| 75 | + # Race condition - another thread created the mapping |
| 76 | + logger.warning("Race condition creating user mapping: %s", e) |
| 77 | + |
| 78 | + # Try to fetch the mapping created by the other thread |
| 79 | + existing_mapping = ( |
| 80 | + session.query(UserMapping).filter_by(user_id_hash=user_id_hash).first() |
| 81 | + ) |
| 82 | + |
| 83 | + if existing_mapping: |
| 84 | + return existing_mapping.anonymous_id |
| 85 | + |
| 86 | + # If we still can't find it, something is wrong |
| 87 | + logger.error( |
| 88 | + "Failed to create or retrieve user mapping for hash %s", |
| 89 | + user_id_hash[:8] + "...", |
| 90 | + ) |
| 91 | + raise RuntimeError("Unable to create or retrieve anonymous user ID") from e |
| 92 | + |
| 93 | + |
| 94 | +def get_user_count() -> int: |
| 95 | + """ |
| 96 | + Get the total number of unique users in the system. |
| 97 | +
|
| 98 | + Returns: |
| 99 | + Total count of unique anonymous users |
| 100 | + """ |
| 101 | + with get_session() as session: |
| 102 | + return session.query(UserMapping).count() |
| 103 | + |
| 104 | + |
| 105 | +def find_anonymous_user_id(auth_user_id: str) -> Optional[str]: |
| 106 | + """ |
| 107 | + Find existing anonymous ID for a user without creating a new one. |
| 108 | +
|
| 109 | + Args: |
| 110 | + auth_user_id: The original user ID from authentication |
| 111 | +
|
| 112 | + Returns: |
| 113 | + Anonymous UUID if found, None otherwise |
| 114 | + """ |
| 115 | + user_id_hash = _hash_user_id(auth_user_id) |
| 116 | + |
| 117 | + with get_session() as session: |
| 118 | + existing_mapping = ( |
| 119 | + session.query(UserMapping).filter_by(user_id_hash=user_id_hash).first() |
| 120 | + ) |
| 121 | + |
| 122 | + return existing_mapping.anonymous_id if existing_mapping else None |
0 commit comments