feat: add sentiment_analysis functionality

dmccrystal · s0h3yl · commit 3001b0779e43 · 2023-06-08T11:23:11.000+02:00
GitOrigin-RevId: 121a1e26b0802319f3e88382e7df8b6e9fbc7947
diff --git a/README.md b/README.md
@@ -356,6 +356,41 @@ config=aai.TranscriptionConfig(
 )
 ```
 
+</details>
+<details>
+  <summary>Analyze the Sentiment of Sentences in a Transcript</summary>
+
+```python
+import assemblyai as aai
+
+transcriber = aai.Transcriber()
+transcript = transcriber.transcribe(
+  "https://example.org/audio.mp3",
+  config=aai.TranscriptionConfig(sentiment_analysis=True)
+)
+
+for sentiment_result in transcript.sentiment_analysis_results:
+  print(sentiment_result.text)
+  print(sentiment_result.sentiment)  # POSITIVE, NEUTRAL, or NEGATIVE
+  print(sentiment_result.confidence)
+  print(f"Timestamp: {sentiment_result.timestamp.start} - {sentiment_result.timestamp.end}")
+```
+
+If `speaker_labels` is also enabled, then each sentiment analysis result will also include a `speaker` field.
+
+```python
+# ...
+
+config = aai.TranscriptionConfig(sentiment_analysis=True, speaker_labels=True)
+
+# ...
+
+for sentiment_result in transcript.sentiment_analysis_results:
+  print(sentiment_result.speaker)
+```
+
+[Read more about sentiment analysis here.](https://www.assemblyai.com/docs/Models/sentiment_analysis)
+
 </details>
 
 ---
diff --git a/assemblyai/transcriber.py b/assemblyai/transcriber.py
@@ -218,6 +218,10 @@ def chapters(self) -> Optional[List[types.Chapter]]:
     def content_safety_labels(self) -> Optional[types.ContentSafetyResponse]:
         return self._impl.transcript.content_safety_labels
 
+    @property
+    def sentiment_analysis_results(self) -> Optional[List[types.Sentiment]]:
+        return self._impl.transcript.sentiment_analysis_results
+
     @property
     def status(self) -> types.TranscriptStatus:
         "The current status of the transcript"
diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -354,8 +354,8 @@ class RawTranscriptionConfig(BaseModel):
     disfluencies: Optional[bool]
     "Transcribe Filler Words, like 'umm', in your media file."
 
-    # sentiment_analysis: bool = False
-    # "Enable Sentiment Analysis."
+    sentiment_analysis: Optional[bool]
+    "Enable Sentiment Analysis."
 
     auto_chapters: Optional[bool]
     "Enable Auto Chapters."
@@ -418,7 +418,7 @@ def __init__(
         # iab_categories: bool = False,
         custom_spelling: Optional[Dict[str, Union[str, Sequence[str]]]] = None,
         disfluencies: Optional[bool] = None,
-        # sentiment_analysis: bool = False,
+        sentiment_analysis: Optional[bool] = None,
         auto_chapters: Optional[bool] = None,
         # entity_detection: bool = False,
         summarization: Optional[bool] = None,
@@ -494,7 +494,7 @@ def __init__(
         # self.iab_categories = iab_categories
         self.set_custom_spelling(custom_spelling, override=True)
         self.disfluencies = disfluencies
-        # self.sentiment_analysis = sentiment_analysis
+        self.sentiment_analysis = sentiment_analysis
         self.auto_chapters = auto_chapters
         # self.entity_detection = entity_detection
         self.set_summarize(
@@ -733,17 +733,17 @@ def disfluencies(self, enable: Optional[bool]) -> None:
 
         return self
 
-    # @property
-    # def sentiment_analysis(self) -> bool:
-    #     "Returns the status of the Sentiment Analysis feature."
+    @property
+    def sentiment_analysis(self) -> Optional[bool]:
+        "Returns the status of the Sentiment Analysis feature."
 
-    #     return self._raw_transcription_config.sentiment_analysis
+        return self._raw_transcription_config.sentiment_analysis
 
-    # @sentiment_analysis.setter
-    # def sentiment_analysis(self, enable: bool) -> None:
-    #     "Enable Sentiment Analysis."
+    @sentiment_analysis.setter
+    def sentiment_analysis(self, enable: Optional[bool]) -> None:
+        "Enable Sentiment Analysis."
 
-    #     self._raw_transcription_config.sentiment_analysis = enable
+        self._raw_transcription_config.sentiment_analysis = enable
 
     @property
     def auto_chapters(self) -> bool:
@@ -752,7 +752,7 @@ def auto_chapters(self) -> bool:
         return self._raw_transcription_config.auto_chapters
 
     @auto_chapters.setter
-    def auto_chapters(self, enable: bool) -> None:
+    def auto_chapters(self, enable: Optional[bool]) -> None:
         "Enable Auto Chapters."
 
         # Validate required params are also set
@@ -1243,6 +1243,7 @@ class IABResponse(BaseModel):
 
 class Sentiment(Word):
     sentiment: SentimentType
+    speaker: Optional[str]
 
 
 class Entity(BaseModel):
@@ -1363,8 +1364,8 @@ class BaseTranscript(BaseModel):
     disfluencies: Optional[bool]
     "Transcribe Filler Words, like 'umm', in your media file."
 
-    # sentiment_analysis: bool = False
-    # "Enable Sentiment Analysis."
+    sentiment_analysis: Optional[bool]
+    "Enable Sentiment Analysis."
 
     auto_chapters: Optional[bool]
     "Enable Auto Chapters."
@@ -1451,10 +1452,10 @@ class TranscriptResponse(BaseTranscript):
     # "The list of results when Topic Detection is enabled"
 
     chapters: Optional[List[Chapter]]
-    # "When Auto Chapters is enabled, the list of Auto Chapters results"
+    "When Auto Chapters is enabled, the list of Auto Chapters results"
 
-    # sentiment_analysis_results: Optional[List[Sentiment]] = None
-    # "When Sentiment Analysis is enabled, the list of Sentiment Analysis results"
+    sentiment_analysis_results: Optional[List[Sentiment]]
+    "When Sentiment Analysis is enabled, the list of Sentiment Analysis results"
 
     # entities: Optional[List[Entity]] = None
     # "When Entity Detection is enabled, the list of detected Entities"
diff --git a/tests/unit/test_content_safety.py b/tests/unit/test_content_safety.py
@@ -77,7 +77,6 @@ def __submit_mock_request(
     Helper function to abstract mock transcriber calls with given `TranscriptionConfig`,
     and perform some common assertions.
     """
-    print(mock_response)
 
     mock_transcript_id = mock_response.get("id", "mock_id")
 
diff --git a/tests/unit/test_sentiment_analysis.py b/tests/unit/test_sentiment_analysis.py
@@ -0,0 +1,132 @@
+import json
+from typing import Any, Dict, Tuple
+
+import factory
+import httpx
+from pytest_httpx import HTTPXMock
+
+import assemblyai as aai
+from tests.unit import factories
+
+aai.settings.api_key = "test"
+
+
+class SentimentFactory(factories.WordFactory):
+    sentiment = factory.Faker("enum", enum_cls=aai.types.SentimentType)
+    speaker = factory.Faker("name")
+
+
+class SentimentAnalysisResponseFactory(factories.TranscriptCompletedResponseFactory):
+    sentiment_analysis_results = factory.List([factory.SubFactory(SentimentFactory)])
+
+
+def __submit_mock_request(
+    httpx_mock: HTTPXMock,
+    mock_response: Dict[str, Any],
+    config: aai.TranscriptionConfig,
+) -> Tuple[Dict[str, Any], aai.Transcript]:
+    """
+    Helper function to abstract mock transcriber calls with given `TranscriptionConfig`,
+    and perform some common assertions.
+    """
+
+    mock_transcript_id = mock_response.get("id", "mock_id")
+
+    # Mock initial submission response (transcript is processing)
+    mock_processing_response = factories.generate_dict_factory(
+        factories.TranscriptProcessingResponseFactory
+    )()
+
+    httpx_mock.add_response(
+        url=f"{aai.settings.base_url}/transcript",
+        status_code=httpx.codes.OK,
+        method="POST",
+        json={
+            **mock_processing_response,
+            "id": mock_transcript_id,  # inject ID from main mock response
+        },
+    )
+
+    # Mock polling-for-completeness response, with completed transcript
+    httpx_mock.add_response(
+        url=f"{aai.settings.base_url}/transcript/{mock_transcript_id}",
+        status_code=httpx.codes.OK,
+        method="GET",
+        json=mock_response,
+    )
+
+    # == Make API request via SDK ==
+    transcript = aai.Transcriber().transcribe(
+        data="https://example.org/audio.wav",
+        config=config,
+    )
+
+    # Check that submission and polling requests were made
+    assert len(httpx_mock.get_requests()) == 2
+
+    # Extract body of initial submission request
+    request = httpx_mock.get_requests()[0]
+    request_body = json.loads(request.content.decode())
+
+    return request_body, transcript
+
+
+def test_sentiment_analysis_disabled_by_default(httpx_mock: HTTPXMock):
+    """
+    Tests that excluding `sentiment_analysis` from the `TranscriptionConfig` will
+    result in the default behavior of it being excluded from the request body
+    """
+    request_body, transcript = __submit_mock_request(
+        httpx_mock,
+        mock_response=factories.generate_dict_factory(
+            factories.TranscriptCompletedResponseFactory
+        )(),
+        config=aai.TranscriptionConfig(),
+    )
+    assert request_body.get("sentiment_analysis") is None
+    assert transcript.sentiment_analysis_results is None
+
+
+def test_sentiment_analysis_enabled(httpx_mock: HTTPXMock):
+    """
+    Tests that including `sentiment_analysis=True` in the `TranscriptionConfig`
+    will result in `sentiment_analysis=True` in the request body, and that the
+    response is properly parsed into a `Transcript` object
+    """
+    mock_response = factories.generate_dict_factory(SentimentAnalysisResponseFactory)()
+    request_body, transcript = __submit_mock_request(
+        httpx_mock,
+        mock_response=mock_response,
+        config=aai.TranscriptionConfig(sentiment_analysis=True),
+    )
+
+    # Check that request body was properly defined
+    assert request_body.get("sentiment_analysis") == True
+
+    # Check that transcript was properly parsed from JSON response
+    assert transcript.error is None
+
+    assert transcript.sentiment_analysis_results is not None
+    assert len(transcript.sentiment_analysis_results) > 0
+    assert len(transcript.sentiment_analysis_results) == len(
+        mock_response["sentiment_analysis_results"]
+    )
+
+    for response_sentiment_result, transcript_sentiment_result in zip(
+        mock_response["sentiment_analysis_results"],
+        transcript.sentiment_analysis_results,
+    ):
+        assert transcript_sentiment_result.text == response_sentiment_result["text"]
+        assert transcript_sentiment_result.start == response_sentiment_result["start"]
+        assert transcript_sentiment_result.end == response_sentiment_result["end"]
+        assert (
+            transcript_sentiment_result.confidence
+            == response_sentiment_result["confidence"]
+        )
+        assert (
+            transcript_sentiment_result.sentiment.value
+            == response_sentiment_result["sentiment"]
+        )
+        assert (
+            transcript_sentiment_result.speaker == response_sentiment_result["speaker"]
+        )