GetStream
diff --git a/‎agents-core/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎agents-core/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agents-core/vision_agents/core/agents/agents.py‎
Lines changed: 63 additions & 64 deletions b/‎agents-core/vision_agents/core/agents/agents.py‎
Lines changed: 63 additions & 64 deletions
diff --git a/‎agents-core/vision_agents/core/profiling/base.py‎
Lines changed: 2 additions & 1 deletion b/‎agents-core/vision_agents/core/profiling/base.py‎
Lines changed: 2 additions & 1 deletion
@@ -21,7 +21,7 @@ classifiers = [
 
 requires-python = ">=3.10"
 dependencies = [
-    "getstream[webrtc,telemetry]>=2.5.9",
+    "getstream[webrtc,telemetry]>=2.5.11",
     "python-dotenv>=1.1.1",
     "pillow>=10.4.0",  # Compatible with moondream SDK (<11.0.0)
     "numpy>=1.24.0",
 
@@ -13,6 +13,7 @@
 from aiortc import VideoStreamTrack
 from getstream.video.rtc import Call
 
+from getstream.video.rtc.participants import ParticipantsState
 from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import TrackType
 from .agent_options import AgentOptions, default_agent_options
 
@@ -23,7 +24,6 @@
     TrackRemovedEvent,
     CallEndedEvent,
 )
-from ..edge.sfu_events import ParticipantJoinedEvent
 from ..edge.types import Connection, Participant, PcmData, User, OutputAudioTrack
 from ..events.manager import EventManager
 from ..llm import events as llm_events
@@ -76,7 +76,7 @@ class TrackInfo:
     id: str
     type: int
     processor: str
-    priority: int # higher goes first
+    priority: int  # higher goes first
     participant: Optional[Participant]
     track: aiortc.mediastreams.VideoStreamTrack
     forwarder: VideoForwarder
@@ -90,6 +90,7 @@ class TrackInfo:
 - cleanup events more
 """
 
+
 class Agent:
     """
     Agent class makes it easy to build your own video AI.
@@ -146,6 +147,7 @@ def __init__(
         log_level: Optional[int] = logging.INFO,
         profiler: Optional[Profiler] = None,
     ):
+        self.participants: Optional[ParticipantsState] = None
         self.call = None
         self._active_processed_track_id: Optional[str] = None
         self._active_source_track_id: Optional[str] = None
@@ -211,7 +213,7 @@ def __init__(
 
         # Attach processors that need agent reference
         for processor in self.processors:
-            if hasattr(processor, '_attach_agent'):
+            if hasattr(processor, "_attach_agent"):
                 processor._attach_agent(self)
 
         self.events.subscribe(self._on_vad_audio)
@@ -267,22 +269,20 @@ async def on_video_track_added(event: TrackAddedEvent | TrackRemovedEvent):
             if event.track_id is None or event.track_type is None or event.user is None:
                 return
             if isinstance(event, TrackRemovedEvent):
-                asyncio.create_task(self._on_track_removed(event.track_id, event.track_type, event.user))
+                asyncio.create_task(
+                    self._on_track_removed(event.track_id, event.track_type, event.user)
+                )
             else:
-                asyncio.create_task(self._on_track_added(event.track_id, event.track_type, event.user))
+                asyncio.create_task(
+                    self._on_track_added(event.track_id, event.track_type, event.user)
+                )
 
         # audio event for the user talking to the AI
         @self.edge.events.subscribe
         async def on_audio_received(event: AudioReceivedEvent):
             if event.participant is not None:
                 await self._reply_to_audio(event.pcm_data, event.participant)
 
-        @self.edge.events.subscribe
-        async def on_participant_joined(event: ParticipantJoinedEvent):
-            if event.participant is not None:
-                self.logger.info(f"Participant {event.participant.user_id} joined")
-                self.participants[event.participant.session_id] = event.participant
-
         @self.events.subscribe
         async def on_stt_transcript_event_create_response(event: STTTranscriptEvent):
             if _is_audio_llm(self.llm):
@@ -342,7 +342,7 @@ async def on_stt_transcript_event_sync_conversation(event: STTTranscriptEvent):
 
         @self.events.subscribe
         async def on_realtime_user_speech_transcription(
-                event: RealtimeUserSpeechTranscriptionEvent,
+            event: RealtimeUserSpeechTranscriptionEvent,
         ):
             self.logger.info(f"🎤 [User transcript]: {event.text}")
 
@@ -362,7 +362,7 @@ async def on_realtime_user_speech_transcription(
 
         @self.events.subscribe
         async def on_realtime_agent_speech_transcription(
-                event: RealtimeAgentSpeechTranscriptionEvent,
+            event: RealtimeAgentSpeechTranscriptionEvent,
         ):
             self.logger.info(f"🎤 [Agent transcript]: {event.text}")
 
@@ -380,9 +380,6 @@ async def on_realtime_agent_speech_transcription(
                     original=event,
                 )
 
-
-
-
         @self.llm.events.subscribe
         async def on_llm_response_sync_conversation(event: LLMResponseCompletedEvent):
             self.logger.info(f"🤖 [LLM response]: {event.text} {event.item_id}")
@@ -424,7 +421,6 @@ async def _handle_output_text_delta(event: LLMResponseChunkEvent):
 
         logger.info("AUDIO: SUB TO EVENTS DONE")
 
-
     async def simple_response(
         self, text: str, participant: Optional[Participant] = None
     ) -> None:
@@ -441,7 +437,7 @@ async def simple_response(
             span.set_attribute("response.original", response.original)
 
     async def simple_audio_response(
-            self, pcm: PcmData, participant: Optional[Participant] = None
+        self, pcm: PcmData, participant: Optional[Participant] = None
     ) -> None:
         """
         Makes it easy to subclass how the agent calls the LLM for processing audio
@@ -463,7 +459,9 @@ def subscribe(self, function):
         """
         return self.events.subscribe(function)
 
-    async def join(self, call: Call, wait_for_participant=True) -> "AgentSessionContextManager":
+    async def join(
+        self, call: Call, wait_for_participant=True
+    ) -> "AgentSessionContextManager":
         # TODO: validation. join can only be called once
         self.logger.info("joining call")
         # run start on all subclasses
@@ -504,7 +502,8 @@ async def join(self, call: Call, wait_for_participant=True) -> "AgentSessionCont
 
             with self.span("edge.join"):
                 connection = await self.edge.join(self, call)
-                self.participants = connection._connection.participants_state._participant_by_prefix
+                self.participants = connection.participants
+
         except Exception:
             self.clear_call_logging_context()
             raise
@@ -548,30 +547,23 @@ async def join(self, call: Call, wait_for_participant=True) -> "AgentSessionCont
 
     async def wait_for_participant(self):
         """wait for a participant other than the AI agent to join"""
-        # Check if a non-agent participant is already present
-        if self.call and self.participants:
-            for p in self.participants.values():
-                if p.user_id != self.agent_user.id:
-                    self.logger.info(f"Participant {p.user_id} already in call")
-                    return
 
-        # If not, wait for one to join
-        participant_joined = asyncio.Event()
+        if self.participants is None:
+            return
 
-        @self.edge.events.subscribe
-        async def on_participant_joined(event: ParticipantJoinedEvent):
-            if event.participant is not None:
-                is_agent = event.participant.user_id == self.agent_user.id
+        participant_joined = asyncio.Event()
 
-                self.logger.info(f"Participant {event.participant.user_id} joined is_agent {is_agent}")
-                if not is_agent:
+        def on_participants(participants):
+            for p in participants:
+                if p.user_id != self.agent_user.id:
                     participant_joined.set()
 
-        # Wait for the event to be set
-        await participant_joined.wait()
+        subscription = self.participants.map(on_participants)
 
-        # Clean up the subscription
-        self.edge.events.unsubscribe(on_participant_joined)
+        try:
+            await participant_joined.wait()
+        finally:
+            subscription.unsubscribe()
 
     async def finish(self):
         """Wait for the call to end gracefully.
@@ -624,7 +616,10 @@ async def _apply(self, function_name: str, *args, **kwargs):
         subclasses = [self.llm, self.stt, self.tts, self.turn_detection, self.edge]
         subclasses.extend(self.processors)
         for subclass in subclasses:
-            if subclass is not None and getattr(subclass, function_name, None) is not None:
+            if (
+                subclass is not None
+                and getattr(subclass, function_name, None) is not None
+            ):
                 func = getattr(subclass, function_name)
                 if func is not None:
                     await func(*args, **kwargs)
@@ -642,13 +637,6 @@ def _end_tracing(self):
     def __aexit__(self, exc_type, exc_val, exc_tb):
         self._end_tracing()
 
-
-
-
-
-
-
-
     async def close(self):
         """Clean up all connections and resources.
 
@@ -888,7 +876,9 @@ async def _reply_to_audio_consumer(self) -> None:
                         for processor in self.audio_processors:
                             if processor is None:
                                 continue
-                            await processor.process_audio(audio_bytes, participant.user_id)
+                            await processor.process_audio(
+                                audio_bytes, participant.user_id
+                            )
 
                         # when in Realtime mode call the Realtime directly (non-blocking)
                         if _is_audio_llm(self.llm):
@@ -937,48 +927,60 @@ async def _image_to_video_processors(self, track_id: str, track_type: int):
         for processor in self.image_processors:
             try:
                 pass
-                #TODO: run this better
-                #await processor.process_image(
+                # TODO: run this better
+                # await processor.process_image(
                 #    img, track_info.participant.user_id, track_id=track_id, track_type=track_type
-                #)
+                # )
             except Exception as e:
                 self.logger.error(
                     f"Error in image processor {type(processor).__name__}: {e}"
                 )
 
-    async def _on_track_removed(self, track_id: str, track_type: int, participant: Participant):
+    async def _on_track_removed(
+        self, track_id: str, track_type: int, participant: Participant
+    ):
         self._active_video_tracks.pop(track_id)
         await self._on_track_change(track_id)
 
     async def _on_track_change(self, track_id: str):
         # shared logic between track remove and added
         # Select a track. Prioritize screenshare over regular
         # This is the track without processing
-        non_processed_tracks = [t for t in self._active_video_tracks.values() if not t.processor]
-        source_track = sorted(non_processed_tracks, key=lambda t: t.priority, reverse=True)[0]
+        non_processed_tracks = [
+            t for t in self._active_video_tracks.values() if not t.processor
+        ]
+        source_track = sorted(
+            non_processed_tracks, key=lambda t: t.priority, reverse=True
+        )[0]
         # assign the tracks that we last used so we can notify of changes...
         self._active_source_track_id = source_track.id
 
         await self._track_to_video_processors(source_track)
 
-        processed_track = sorted([t for t in self._active_video_tracks.values()], key=lambda t: t.priority, reverse=True)[0]
+        processed_track = sorted(
+            [t for t in self._active_video_tracks.values()],
+            key=lambda t: t.priority,
+            reverse=True,
+        )[0]
         self._active_processed_track_id = processed_track.id
 
         # See if we have a processed track. If so forward that to LLM
         # TODO: this should run in a loop and handle multiple forwarders
-        #self._image_to_video_processors()
+        # self._image_to_video_processors()
 
         # If Realtime provider supports video, switch to this new track
         if _is_video_llm(self.llm):
             await self.llm.watch_video_track(
                 processed_track.track, shared_forwarder=processed_track.forwarder
             )
 
-    async def _on_track_added(self, track_id: str, track_type: int, participant: Participant):
+    async def _on_track_added(
+        self, track_id: str, track_type: int, participant: Participant
+    ):
         # We only process video tracks (camera video or screenshare)
         if track_type not in (
-                TrackType.TRACK_TYPE_VIDEO,
-                TrackType.TRACK_TYPE_SCREEN_SHARE,
+            TrackType.TRACK_TYPE_VIDEO,
+            TrackType.TRACK_TYPE_SCREEN_SHARE,
         ):
             return
 
@@ -1001,14 +1003,12 @@ async def _on_track_added(self, track_id: str, track_type: int, participant: Par
             processor="",
             track=track,
             participant=participant,
-            priority = 1 if track_type == TrackType.TRACK_TYPE_SCREEN_SHARE else 0,
-            forwarder = forwarder
+            priority=1 if track_type == TrackType.TRACK_TYPE_SCREEN_SHARE else 0,
+            forwarder=forwarder,
         )
 
         await self._on_track_change(track_id)
 
-
-
     async def _on_turn_event(self, event: TurnStartedEvent | TurnEndedEvent) -> None:
         """Handle turn detection events."""
         # Skip the turn event handling if the model doesn't require TTS or SST audio itself.
@@ -1066,7 +1066,6 @@ async def _on_turn_event(self, event: TurnStartedEvent | TurnEndedEvent) -> None
                 # Clear the pending transcript for this speaker
                 self._pending_user_transcripts[event.participant.user_id] = ""
 
-
     @property
     def publish_audio(self) -> bool:
         """Whether the agent should publish an outbound audio track.
@@ -1241,7 +1240,7 @@ def _prepare_rtc(self):
                 track=self._video_track,
                 participant=None,
                 priority=2,
-                forwarder=forwarder
+                forwarder=forwarder,
             )
 
             self.logger.info("🎥 Video track initialized from video publisher")
 
@@ -1,4 +1,3 @@
-import pyinstrument
 import logging
 
 from vision_agents.core.events import EventManager
@@ -29,6 +28,8 @@ def __init__(self, output_path='./profile.html'):
             output_path: Path where the HTML profile report will be saved.
                 Defaults to './profile.html'.
         """
+        import pyinstrument
+
         self.output_path = output_path
         self.events = EventManager()
         self.events.register_events_from_module(events)