From 8a765e17ddafd238b512276978b24cfb8d6df669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Per=C5=BCy=C5=82o?= Date: Tue, 10 Feb 2026 08:19:32 +0100 Subject: [PATCH 1/4] Add agent vision docs --- api/fishjam-server | 2 +- api/protos | 2 +- api/room-manager | 2 +- docs/tutorials/agents.mdx | 71 ++++++++++++++++++++++++++++++++++++++ packages/js-server-sdk | 2 +- packages/python-server-sdk | 2 +- packages/web-client-sdk | 2 +- 7 files changed, 77 insertions(+), 6 deletions(-) diff --git a/api/fishjam-server b/api/fishjam-server index 4d7c5508..9b7cf8f1 160000 --- a/api/fishjam-server +++ b/api/fishjam-server @@ -1 +1 @@ -Subproject commit 4d7c550873599856cece48d95be58b8636016dbd +Subproject commit 9b7cf8f1058bd3a3062717c85f5d084eca577996 diff --git a/api/protos b/api/protos index 1ff0f980..9d807b55 160000 --- a/api/protos +++ b/api/protos @@ -1 +1 @@ -Subproject commit 1ff0f9806c53da1d29be98e14d31096cdabeffef +Subproject commit 9d807b55279de385136f82b12f5df75d73104514 diff --git a/api/room-manager b/api/room-manager index ac1122a5..31836f4e 160000 --- a/api/room-manager +++ b/api/room-manager @@ -1 +1 @@ -Subproject commit ac1122a55f70c7f06872e4106b6452a9f199480b +Subproject commit 31836f4ed6c8551b3892cea50d2872cdf2292e71 diff --git a/docs/tutorials/agents.mdx b/docs/tutorials/agents.mdx index 03a5ba3e..31b94169 100644 --- a/docs/tutorials/agents.mdx +++ b/docs/tutorials/agents.mdx @@ -206,6 +206,77 @@ You can interrupt the currently played audio chunk. See the example below. +### Making the Agent see + +Agents can also request video frames (images) from peers' video tracks. +Unlike audio, which streams continuously, video frames must be explicitly requested and arrive asynchronously. + +:::tip +We recommend not requesting frames more than once per second to avoid unnecessary load. +::: + + + + + ```ts + // @noErrors + import { RoomId, FishjamClient, TrackId } from '@fishjam-cloud/js-server-sdk'; + + const fishjamId = ''; + const managementToken = ''; + const fishjamClient = new FishjamClient({ fishjamId, managementToken }); + const room = await fishjamClient.createRoom(); + const { agent } = await fishjamClient.createAgent(room.id, {}); + const trackId: TrackId = '' as TrackId; + + // ---cut--- + import type { IncomingTrackImage } from '@fishjam-cloud/js-server-sdk'; + + // Listen for incoming video frames + agent.on('trackImage', (message: IncomingTrackImage) => { + const { contentType, data } = message; + // process the image data + }); + + // Request a frame periodically + setInterval(() => { + // [!code highlight:1] + agent.captureImage(trackId); + }, 1000); + + ``` + + + + + + ```python + import asyncio + + from fishjam import FishjamClient + from fishjam.agent import IncomingTrackImage + + fishjam_client = FishjamClient(fishjam_id, management_token) + + agent = fishjam_client.create_agent(room_id) + + async with agent.connect() as session: + # Request a frame + # [!code highlight:1] + await session.capture_image(track_id) + + # Captured frames arrive as IncomingTrackImage messages + async for message in session.receive(): + match message: + case IncomingTrackImage(track_id=track_id, data=data): + # process the image data + pass + ``` + + + + + ### Disconnecting After you're done using an agent, you can disconnect it from the room. diff --git a/packages/js-server-sdk b/packages/js-server-sdk index 7aa91272..5093c326 160000 --- a/packages/js-server-sdk +++ b/packages/js-server-sdk @@ -1 +1 @@ -Subproject commit 7aa91272ab7d18fdf42ce6de3cf7585b5d0dd285 +Subproject commit 5093c326f087fbff97ecb84523e27d6e67ca1662 diff --git a/packages/python-server-sdk b/packages/python-server-sdk index fb05c3cb..b968feb8 160000 --- a/packages/python-server-sdk +++ b/packages/python-server-sdk @@ -1 +1 @@ -Subproject commit fb05c3cb78450e442ecd683e679e0d3654ac7f1d +Subproject commit b968feb81d893fc15b06440ea46ca999dd40e5d0 diff --git a/packages/web-client-sdk b/packages/web-client-sdk index 52dcce8e..b73a7b1d 160000 --- a/packages/web-client-sdk +++ b/packages/web-client-sdk @@ -1 +1 @@ -Subproject commit 52dcce8e5d9028b4b4a64ceba99c1e70db0838ed +Subproject commit b73a7b1d96c4a63909b2bb641aa3285b51a01999 From 2589c849bd56054fc95740c7d97b6205d42c2d6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Per=C5=BCy=C5=82o?= Date: Tue, 10 Feb 2026 11:11:01 +0100 Subject: [PATCH 2/4] Update python server sdk --- packages/python-server-sdk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/python-server-sdk b/packages/python-server-sdk index b968feb8..0889d548 160000 --- a/packages/python-server-sdk +++ b/packages/python-server-sdk @@ -1 +1 @@ -Subproject commit b968feb81d893fc15b06440ea46ca999dd40e5d0 +Subproject commit 0889d54851167442d6a9318847a269f9c7466854 From 6a4af6e5284e9816839d555b35514619b7a92771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Per=C5=BCy=C5=82o?= Date: Tue, 10 Feb 2026 11:54:23 +0100 Subject: [PATCH 3/4] Update docs/tutorials/agents.mdx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/tutorials/agents.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/agents.mdx b/docs/tutorials/agents.mdx index 31b94169..9d044625 100644 --- a/docs/tutorials/agents.mdx +++ b/docs/tutorials/agents.mdx @@ -268,7 +268,8 @@ We recommend not requesting frames more than once per second to avoid unnecessar # Captured frames arrive as IncomingTrackImage messages async for message in session.receive(): match message: - case IncomingTrackImage(track_id=track_id, data=data): + case IncomingTrackImage() as msg if msg.track_id == track_id: + data = msg.data # process the image data pass ``` From bf431506fb15fe477ea5af068ebf2e8ad3ed2d16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Per=C5=BCy=C5=82o?= Date: Tue, 10 Feb 2026 14:29:14 +0100 Subject: [PATCH 4/4] Adjust to CR --- docs/tutorials/agents.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorials/agents.mdx b/docs/tutorials/agents.mdx index 31b94169..77c95245 100644 --- a/docs/tutorials/agents.mdx +++ b/docs/tutorials/agents.mdx @@ -208,11 +208,11 @@ You can interrupt the currently played audio chunk. See the example below. ### Making the Agent see -Agents can also request video frames (images) from peers' video tracks. +Agents can also request video frames (JPEG images) from peers' video tracks. Unlike audio, which streams continuously, video frames must be explicitly requested and arrive asynchronously. -:::tip -We recommend not requesting frames more than once per second to avoid unnecessary load. +:::important +Video frame capture is rate-limited to one frame per second per track. :::