fix unit test failures

thoraxe · thoraxe · commit 26505ca78d74 · 2025-08-26T09:34:33.000-04:00
diff --git a/src/app/endpoints/conversations.py b/src/app/endpoints/conversations.py
@@ -190,20 +190,27 @@ def get_conversations_list_endpoint_handler(
             ]
 
             logger.info(
-                "Found %d conversations for user %s", len(conversations), user_id
+                "Found %d conversations for anonymous user %s",
+                len(conversations),
+                anonymous_user_id,
             )
 
             return ConversationsListResponse(conversations=conversations)
 
         except Exception as e:
             logger.exception(
-                "Error retrieving conversations for user %s: %s", user_id, e
+                "Error retrieving conversations for anonymous user %s: %s",
+                anonymous_user_id,
+                e,
             )
             raise HTTPException(
                 status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
                 detail={
                     "response": "Unknown error",
-                    "cause": f"Unknown error while getting conversations for user {user_id}",
+                    "cause": (
+                        f"Unknown error while getting conversations for "
+                        f"anonymous user {anonymous_user_id}"
+                    ),
                 },
             ) from e
 
diff --git a/src/app/endpoints/feedback.py b/src/app/endpoints/feedback.py
@@ -19,6 +19,7 @@
 )
 from models.requests import FeedbackRequest
 from utils.suid import get_suid
+from utils.user_anonymization import get_anonymous_user_id
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/feedback", tags=["feedback"])
@@ -131,7 +132,8 @@ def store_feedback(user_id: str, feedback: dict) -> None:
         user_id (str): Unique identifier of the user submitting feedback.
         feedback (dict): Feedback data to be stored, merged with user ID and timestamp.
     """
-    logger.debug("Storing feedback for user %s", user_id)
+    anonymous_user_id = get_anonymous_user_id(user_id)
+    logger.debug("Storing feedback for anonymous user %s", anonymous_user_id)
     # Creates storage path only if it doesn't exist. The `exist_ok=True` prevents
     # race conditions in case of multiple server instances trying to set up storage
     # at the same location.
@@ -141,7 +143,11 @@ def store_feedback(user_id: str, feedback: dict) -> None:
     storage_path.mkdir(parents=True, exist_ok=True)
 
     current_time = str(datetime.now(UTC))
-    data_to_store = {"user_id": user_id, "timestamp": current_time, **feedback}
+    data_to_store = {
+        "anonymous_user_id": anonymous_user_id,
+        "timestamp": current_time,
+        **feedback,
+    }
 
     # stores feedback in a file under unique uuid
     feedback_file_path = storage_path / f"{get_suid()}.json"
diff --git a/tests/unit/app/endpoints/test_conversations.py b/tests/unit/app/endpoints/test_conversations.py
@@ -46,6 +46,10 @@ def create_mock_conversation(
 
 def mock_database_session(mocker, query_result=None):
     """Helper function to mock get_session with proper context manager support."""
+    # Mock database initialization
+    mocker.patch("app.database.engine", mocker.Mock())
+    mocker.patch("app.database.SessionLocal", mocker.Mock())
+
     mock_session = mocker.Mock()
     if query_result is not None:
         mock_session.query.return_value.filter_by.return_value.all.return_value = (
@@ -530,6 +534,10 @@ def test_configuration_not_loaded(self, mocker):
     def test_successful_conversations_list_retrieval(self, mocker, setup_configuration):
         """Test successful retrieval of conversations list."""
         mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch(
+            "app.endpoints.conversations.get_anonymous_user_id",
+            return_value="anon-test-user",
+        )
 
         # Mock database session and query results
         mock_conversations = [
@@ -570,6 +578,10 @@ def test_successful_conversations_list_retrieval(self, mocker, setup_configurati
     def test_empty_conversations_list(self, mocker, setup_configuration):
         """Test when user has no conversations."""
         mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch(
+            "app.endpoints.conversations.get_anonymous_user_id",
+            return_value="anon-test-user",
+        )
 
         # Mock database session with no results
         mock_database_session(mocker, [])
@@ -583,6 +595,10 @@ def test_empty_conversations_list(self, mocker, setup_configuration):
     def test_database_exception(self, mocker, setup_configuration):
         """Test when database query raises an exception."""
         mocker.patch("app.endpoints.conversations.configuration", setup_configuration)
+        mocker.patch(
+            "app.endpoints.conversations.get_anonymous_user_id",
+            return_value="anon-test-user",
+        )
 
         # Mock database session to raise exception
         mock_session = mock_database_session(mocker)
@@ -594,6 +610,6 @@ def test_database_exception(self, mocker, setup_configuration):
         assert exc_info.value.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
         assert "Unknown error" in exc_info.value.detail["response"]
         assert (
-            "Unknown error while getting conversations for user"
+            "Unknown error while getting conversations for anonymous user"
             in exc_info.value.detail["cause"]
         )
diff --git a/tests/unit/app/endpoints/test_feedback.py b/tests/unit/app/endpoints/test_feedback.py
@@ -137,6 +137,9 @@ def test_store_feedback(mocker, feedback_request_data):
     mocker.patch("builtins.open", mocker.mock_open())
     mocker.patch("app.endpoints.feedback.Path", return_value=mocker.MagicMock())
     mocker.patch("app.endpoints.feedback.get_suid", return_value="fake-uuid")
+    mocker.patch(
+        "app.endpoints.feedback.get_anonymous_user_id", return_value="anon-test-user"
+    )
 
     # Patch json to inspect stored data
     mock_json = mocker.patch("app.endpoints.feedback.json")
@@ -146,7 +149,7 @@ def test_store_feedback(mocker, feedback_request_data):
     store_feedback(user_id, feedback_request_data)
 
     expected_data = {
-        "user_id": user_id,
+        "anonymous_user_id": "anon-test-user",
         "timestamp": mocker.ANY,
         **feedback_request_data,
     }
@@ -182,6 +185,9 @@ def test_store_feedback_on_io_error(mocker, feedback_request_data):
     configuration.user_data_collection_configuration.feedback_storage = "fake-path"
     mocker.patch("app.endpoints.feedback.Path", return_value=mocker.MagicMock())
     mocker.patch("builtins.open", side_effect=PermissionError("EACCES"))
+    mocker.patch(
+        "app.endpoints.feedback.get_anonymous_user_id", return_value="anon-test-user"
+    )
 
     user_id = "test_user_id"
 
diff --git a/tests/unit/app/endpoints/test_feedback_anonymization.py b/tests/unit/app/endpoints/test_feedback_anonymization.py
@@ -0,0 +1,138 @@
+"""Tests for feedback endpoint anonymization functionality."""
+
+import os
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from app.endpoints.feedback import store_feedback
+
+
+# Set up test environment variable before importing the module
+@pytest.fixture(autouse=True)
+def setup_test_pepper():
+    """Set up test pepper environment variable for all tests."""
+    test_pepper = "test-pepper-for-feedback-tests"
+    with patch.dict(os.environ, {"USER_ANON_PEPPER": test_pepper}):
+        yield
+
+
+class TestFeedbackAnonymization:
+    """Test feedback storage with user anonymization."""
+
+    @patch("app.endpoints.feedback.get_anonymous_user_id")
+    @patch("app.endpoints.feedback.get_suid")
+    @patch("app.endpoints.feedback.json")
+    @patch("app.endpoints.feedback.Path")
+    def test_store_feedback_anonymizes_user_id(
+        self, mock_path, mock_json, mock_get_suid, mock_get_anonymous
+    ):
+        """Test that store_feedback uses anonymous user ID."""
+        # Setup mocks
+        mock_get_anonymous.return_value = "anon-feedback-123"
+        mock_get_suid.return_value = "feedback-uuid"
+        mock_path.return_value = MagicMock()
+
+        # Mock configuration
+        with (
+            patch("app.endpoints.feedback.configuration") as mock_config,
+            patch("builtins.open"),
+        ):
+            mock_config.user_data_collection_configuration.feedback_storage = (
+                "/tmp/feedback"
+            )
+
+            # Call store_feedback
+            store_feedback(
+                user_id="original_user@example.com",
+                feedback={
+                    "feedback": "This is test feedback",
+                    "sentiment": 1,
+                    "categories": ["helpful"],
+                },
+            )
+
+            # Verify anonymous user ID was used
+            mock_get_anonymous.assert_called_once_with("original_user@example.com")
+
+            # Verify stored data uses anonymous ID
+            stored_data = mock_json.dump.call_args[0][0]
+            assert stored_data["anonymous_user_id"] == "anon-feedback-123"
+            assert "user_id" not in stored_data
+            assert stored_data["feedback"] == "This is test feedback"
+            assert stored_data["sentiment"] == 1
+            assert stored_data["categories"] == ["helpful"]
+
+    @patch("app.endpoints.feedback.get_anonymous_user_id")
+    def test_store_feedback_different_users_get_different_anonymous_ids(
+        self, mock_get_anonymous
+    ):
+        """Test that different users get different anonymous IDs for feedback."""
+
+        def mock_anonymous_side_effect(user_id):
+            if user_id == "user1@example.com":
+                return "anon-feedback-user1"
+            if user_id == "user2@example.com":
+                return "anon-feedback-user2"
+            return "anon-unknown"
+
+        mock_get_anonymous.side_effect = mock_anonymous_side_effect
+
+        with (
+            patch("app.endpoints.feedback.json") as mock_json,
+            patch("app.endpoints.feedback.Path") as mock_path,
+            patch("builtins.open"),
+            patch("app.endpoints.feedback.get_suid", return_value="uuid"),
+            patch("app.endpoints.feedback.configuration") as mock_config,
+        ):
+
+            mock_config.user_data_collection_configuration.feedback_storage = (
+                "/tmp/feedback"
+            )
+            mock_path.return_value = MagicMock()
+
+            # Store feedback for user 1
+            store_feedback("user1@example.com", {"feedback": "Test 1"})
+            first_call_data = mock_json.dump.call_args[0][0]
+
+            # Reset mock for second call
+            mock_json.reset_mock()
+
+            # Store feedback for user 2
+            store_feedback("user2@example.com", {"feedback": "Test 2"})
+            second_call_data = mock_json.dump.call_args[0][0]
+
+            # Verify different anonymous IDs were used
+            assert first_call_data["anonymous_user_id"] == "anon-feedback-user1"
+            assert second_call_data["anonymous_user_id"] == "anon-feedback-user2"
+            assert (
+                first_call_data["anonymous_user_id"]
+                != second_call_data["anonymous_user_id"]
+            )
+
+    @patch("app.endpoints.feedback.get_anonymous_user_id")
+    @patch("app.endpoints.feedback.logger")
+    def test_feedback_logging_uses_anonymous_id(self, mock_logger, mock_get_anonymous):
+        """Test that feedback logging uses anonymous user ID."""
+        mock_get_anonymous.return_value = "anon-feedback-logging"
+
+        with (
+            patch("app.endpoints.feedback.json"),
+            patch("app.endpoints.feedback.Path") as mock_path,
+            patch("builtins.open"),
+            patch("app.endpoints.feedback.get_suid", return_value="uuid"),
+            patch("app.endpoints.feedback.configuration") as mock_config,
+        ):
+
+            mock_config.user_data_collection_configuration.feedback_storage = (
+                "/tmp/feedback"
+            )
+            mock_path.return_value = MagicMock()
+
+            # Store feedback
+            store_feedback("user@example.com", {"feedback": "Test feedback"})
+
+            # Verify logging uses anonymous ID
+            mock_logger.debug.assert_called_once_with(
+                "Storing feedback for anonymous user %s", "anon-feedback-logging"
+            )
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
@@ -169,17 +169,17 @@ async def _test_query_endpoint_handler(mocker, store_transcript_to_file=False):
     # Assert the store_transcript function is called if transcripts are enabled
     if store_transcript_to_file:
         mock_transcript.assert_called_once_with(
-            anonymous_user_id="mock_user_id",
+            user_id="mock_user_id",
             conversation_id=conversation_id,
             model_id="fake_model_id",
             provider_id="fake_provider_id",
             query_is_valid=True,
             query=query,
             query_request=query_request,
             summary=summary,
-            attachments=[],
             rag_chunks=[],
             truncated=False,
+            attachments=[],
         )
     else:
         mock_transcript.assert_not_called()
diff --git a/tests/unit/utils/test_transcripts.py b/tests/unit/utils/test_transcripts.py
@@ -50,11 +50,18 @@ def test_construct_transcripts_path(mocker):
 def test_store_transcript(mocker):
     """Test the store_transcript function."""
 
+    # Mock database initialization to prevent the error
+    mocker.patch("app.database.engine", mocker.Mock())
+    mocker.patch("app.database.SessionLocal", mocker.Mock())
+
     mocker.patch("builtins.open", mocker.mock_open())
     mocker.patch(
         "utils.transcripts.construct_transcripts_path",
         return_value=mocker.MagicMock(),
     )
+    mocker.patch(
+        "utils.transcripts.get_anonymous_user_id", return_value="anon-user-123"
+    )
 
     # Mock the JSON to assert the data is stored correctly
     mock_json = mocker.patch("utils.transcripts.json")
@@ -104,7 +111,7 @@ def test_store_transcript(mocker):
                 "model": "fake-model",
                 "query_provider": query_request.provider,
                 "query_model": query_request.model,
-                "user_id": user_id,
+                "anonymous_user_id": "anon-user-123",
                 "conversation_id": conversation_id,
                 "timestamp": mocker.ANY,
             },
diff --git a/tests/unit/utils/test_transcripts_anonymization.py b/tests/unit/utils/test_transcripts_anonymization.py
@@ -213,8 +213,14 @@ def test_store_transcript_with_attachments(
             assert data["attachments"][0]["attachment_type"] == "text"
             assert data["attachments"][0]["content"] == "Test attachment content"
 
-    def test_path_sanitization_with_anonymous_ids(self):
+    @patch("utils.transcripts.configuration")
+    def test_path_sanitization_with_anonymous_ids(self, mock_config):
         """Test that path sanitization works correctly with anonymous UUIDs."""
+        # Setup mock configuration
+        mock_config.user_data_collection_configuration.transcripts_storage = (
+            "/tmp/transcripts"
+        )
+
         # Test with various UUID formats and potential path injection
         test_cases = [
             ("anon-uuid-123", "conv-456"),
@@ -229,7 +235,8 @@ def test_path_sanitization_with_anonymous_ids(self):
 
             # Should not contain path traversal sequences
             assert "../" not in result_str
-            assert not result_str.startswith("/")
+            # Paths should be absolute (start with /) since we use /tmp/transcripts as base
+            assert result_str.startswith("/tmp/transcripts/")
 
     @patch("utils.transcripts.get_anonymous_user_id")
     def test_logging_shows_anonymization(self, mock_get_anonymous, caplog):