From dea52eabf331b52fb349d8b9b81004afaa03f1c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Per=C5=BCy=C5=82o?= Date: Wed, 4 Feb 2026 14:17:28 +0100 Subject: [PATCH 1/2] Add support for agent image capture --- .gitmodules | 1 + examples/multimodal/README.md | 25 ++ examples/multimodal/package.json | 24 ++ examples/multimodal/src/const.ts | 3 + examples/multimodal/src/controllers/peers.ts | 8 + examples/multimodal/src/environment.d.ts | 11 + examples/multimodal/src/index.ts | 21 ++ examples/multimodal/src/service/fishjam.ts | 32 ++ examples/multimodal/src/service/multimodal.ts | 227 ++++++++++++++ examples/multimodal/tsconfig.json | 14 + packages/fishjam-proto/protos | 2 +- .../src/fishjam/agent_notifications.ts | 201 ++++++++++++- .../src/fishjam/server_notifications.ts | 278 +++++++++++++++++- packages/js-server-sdk/src/agent.ts | 18 +- .../js-server-sdk/src/integrations/gemini.ts | 1 + yarn.lock | 16 + 16 files changed, 876 insertions(+), 6 deletions(-) create mode 100644 examples/multimodal/README.md create mode 100644 examples/multimodal/package.json create mode 100644 examples/multimodal/src/const.ts create mode 100644 examples/multimodal/src/controllers/peers.ts create mode 100644 examples/multimodal/src/environment.d.ts create mode 100644 examples/multimodal/src/index.ts create mode 100644 examples/multimodal/src/service/fishjam.ts create mode 100644 examples/multimodal/src/service/multimodal.ts create mode 100644 examples/multimodal/tsconfig.json diff --git a/.gitmodules b/.gitmodules index 2294e3fc..8f7a32f3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,4 @@ [submodule "packages/fishjam-proto/protos"] path = packages/fishjam-proto/protos url = https://github.com/fishjam-cloud/protos.git + branch = agent-capture-image diff --git a/examples/multimodal/README.md b/examples/multimodal/README.md new file mode 100644 index 00000000..8b48f5ba --- /dev/null +++ b/examples/multimodal/README.md @@ -0,0 +1,25 @@ +# Multimodal with Fishjam and Gemini Live API + +This example shows how to integrate Fishjam with the Gemini Live API for multimodal (audio + video) interactions. +It periodically captures images from video tracks and sends them alongside audio to Gemini. + +## Development + +To start the development server you must first copy `.env.example` to `.env`. + +Then you need to set the following variables: + +- `FISHJAM_ID`: your Fishjam ID, which you can get at +- `FISHJAM_TOKEN`: your Fishjam management token, which you can get at +- `GEMINI_API_KEY`: your Gemini API key, which you can get at + +Once you've set up your environment variables, all you need to do is run the following command: + +```bash +yarn dev +``` + +When the server is running, you can obtain peer tokens by going to . + +When you connect peers with audio and video, the agent will periodically capture video frames and send them along with audio to Gemini for multimodal understanding. +You can connect peers with the [fishjam minimal-react example](https://github.com/fishjam-cloud/web-client-sdk/tree/main/examples/react-client). diff --git a/examples/multimodal/package.json b/examples/multimodal/package.json new file mode 100644 index 00000000..c07231ca --- /dev/null +++ b/examples/multimodal/package.json @@ -0,0 +1,24 @@ +{ + "name": "multimodal-demo", + "version": "0.24.0", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "format": "prettier --write .", + "typecheck": "tsc", + "dev": "bun run --watch src/index.ts" + }, + "dependencies": { + "@fishjam-cloud/js-server-sdk": "workspace:*", + "@google/genai": "^1.13.0", + "@grotto/logysia": "^0.1.6", + "bun": "^1.2.20", + "elysia": "latest" + }, + "devDependencies": { + "@types/bun": "^1", + "bun-types": "latest", + "prettier": "^3.6.2", + "typescript": "^5.9.3" + }, + "module": "src/index.js" +} diff --git a/examples/multimodal/src/const.ts b/examples/multimodal/src/const.ts new file mode 100644 index 00000000..2bb950fa --- /dev/null +++ b/examples/multimodal/src/const.ts @@ -0,0 +1,3 @@ +export const MULTIMODAL_MODEL = 'gemini-2.5-flash-native-audio-preview-12-2025'; + +export const CAPTURE_INTERVAL_MS = 3000; diff --git a/examples/multimodal/src/controllers/peers.ts b/examples/multimodal/src/controllers/peers.ts new file mode 100644 index 00000000..cee9f9e0 --- /dev/null +++ b/examples/multimodal/src/controllers/peers.ts @@ -0,0 +1,8 @@ +import { Elysia } from 'elysia'; +import { FishjamService } from '../service/fishjam'; + +export const peerController = (fishjam: FishjamService) => + new Elysia().get('/peers', async () => { + const { peer: _peer, peerToken } = await fishjam.createPeer(); + return { token: peerToken }; + }); diff --git a/examples/multimodal/src/environment.d.ts b/examples/multimodal/src/environment.d.ts new file mode 100644 index 00000000..527ff29f --- /dev/null +++ b/examples/multimodal/src/environment.d.ts @@ -0,0 +1,11 @@ +declare global { + namespace NodeJS { + interface ProcessEnv { + FISHJAM_ID: string; + FISHJAM_TOKEN?: string; + GEMINI_API_KEY?: string; + } + } +} + +export {}; diff --git a/examples/multimodal/src/index.ts b/examples/multimodal/src/index.ts new file mode 100644 index 00000000..ca76b0a2 --- /dev/null +++ b/examples/multimodal/src/index.ts @@ -0,0 +1,21 @@ +import { Elysia } from 'elysia'; +import { peerController } from './controllers/peers'; +import { FishjamService } from './service/fishjam'; +import { MultimodalService } from './service/multimodal'; + +if (!process.env.FISHJAM_ID || !process.env.FISHJAM_TOKEN || !process.env.GEMINI_API_KEY) { + throw Error('Environment variables FISHJAM_ID, FISHJAM_TOKEN and GEMINI_API_KEY are required.'); +} + +const fishjamConfig = { + fishjamId: process.env.FISHJAM_ID, + managementToken: process.env.FISHJAM_TOKEN, +}; + +const fishjam = new FishjamService(fishjamConfig); + +new MultimodalService(fishjamConfig, process.env.GEMINI_API_KEY); + +const app = new Elysia().use(peerController(fishjam)).listen(3000); + +console.log(`Elysia is running at ${app.server?.hostname}:${app.server?.port}`); diff --git a/examples/multimodal/src/service/fishjam.ts b/examples/multimodal/src/service/fishjam.ts new file mode 100644 index 00000000..0790b22a --- /dev/null +++ b/examples/multimodal/src/service/fishjam.ts @@ -0,0 +1,32 @@ +import { FishjamClient, FishjamConfig, RoomId, RoomNotFoundException } from '@fishjam-cloud/js-server-sdk'; + +export class FishjamService { + roomId?: RoomId; + fishjam: FishjamClient; + + constructor(config: FishjamConfig) { + this.fishjam = new FishjamClient(config); + } + + async createPeer() { + try { + return await this.makePeer(); + } catch (e) { + if (e instanceof RoomNotFoundException) { + await this.makeRoom(); + return this.makePeer(); + } + throw e; + } + } + + private async makeRoom() { + const { id: roomId } = await this.fishjam.createRoom(); + this.roomId = roomId; + } + + private async makePeer() { + if (!this.roomId) await this.makeRoom(); + return this.fishjam.createPeer(this.roomId!); + } +} diff --git a/examples/multimodal/src/service/multimodal.ts b/examples/multimodal/src/service/multimodal.ts new file mode 100644 index 00000000..e841473c --- /dev/null +++ b/examples/multimodal/src/service/multimodal.ts @@ -0,0 +1,227 @@ +import { + FishjamAgent, + FishjamConfig, + FishjamWSNotifier, + FishjamClient, + PeerConnected, + PeerDisconnected, + PeerId, + RoomId, + TrackAdded, + TrackRemoved, + TrackId, + IncomingTrackData, + IncomingTrackImage, +} from '@fishjam-cloud/js-server-sdk'; +import GeminiIntegration from '@fishjam-cloud/js-server-sdk/gemini'; +import { GoogleGenAI, LiveServerMessage, Modality, Session } from '@google/genai'; +import { MULTIMODAL_MODEL, CAPTURE_INTERVAL_MS } from '../const'; + +type AgentState = { + agent: FishjamAgent; + outputTrackId: TrackId; +}; + +export class MultimodalService { + peerSessions: Map = new Map(); + agents: Map = new Map(); + videoTracks: Map> = new Map(); + captureIntervals: Map> = new Map(); + ai: GoogleGenAI; + fishjamConfig: FishjamConfig; + fishjamClient: FishjamClient; + + constructor(fishjamConfig: FishjamConfig, geminiKey: string) { + this.ai = GeminiIntegration.createClient({ apiKey: geminiKey }); + this.fishjamConfig = fishjamConfig; + this.fishjamClient = new FishjamClient(fishjamConfig); + this.initFishjam(); + } + + private initFishjam() { + const notifier = new FishjamWSNotifier( + this.fishjamConfig, + (error) => console.error('Fishjam websocket error: %O', error), + (code, reason) => console.log(`Fishjam websocket closed. code: ${code}, reason: ${reason}`) + ); + + notifier.on('peerConnected', (msg) => this.handlePeerConnected(msg)); + notifier.on('peerDisconnected', (msg) => this.handlePeerDisconnected(msg)); + notifier.on('trackAdded', (msg) => this.handleTrackAdded(msg)); + notifier.on('trackRemoved', (msg) => this.handleTrackRemoved(msg)); + } + + async handlePeerConnected(message: PeerConnected) { + if (message.peerType === 2) return; + + console.log('Peer connected: %O', message); + + const peerId = message.peerId; + const agentState = this.agents.get(message.roomId); + + if (agentState && peerId === (agentState as { agent: FishjamAgent }).agent.constructor.name) return; + + if (agentState == undefined) { + const { + peer: { id: newAgentId }, + agent, + } = await this.fishjamClient.createAgent( + message.roomId, + { output: GeminiIntegration.geminiInputAudioSettings }, + { + onClose: (code, reason) => console.log(`Fishjam agent websocket closed. code: ${code}, reason: ${reason}`), + onError: (error) => console.error('Fishjam agent websocket error: %O', error), + } + ); + + const outputTrack = agent.createTrack(GeminiIntegration.geminiOutputAudioSettings); + + this.agents.set(message.roomId, { agent, outputTrackId: outputTrack.id }); + this.videoTracks.set(message.roomId, new Set()); + + agent.on('trackData', (msg) => this.handleTrackData(msg)); + agent.on('trackImage', (msg) => this.handleTrackImage(message.roomId, msg)); + + this.startImageCapture(message.roomId); + + console.log(`Agent ${newAgentId} created`); + } + + const session = await this.ai.live.connect({ + model: MULTIMODAL_MODEL, + config: { + responseModalities: [Modality.AUDIO], + }, + callbacks: { + onopen: () => console.log(`Connected to Gemini Live API for peer ${peerId}.`), + onerror: (error) => console.error(`Gemini error for peer ${peerId}: %O`, error), + onclose: (e) => + console.log(`Connection to Gemini Live API for peer ${peerId} closed. code: ${e.code}, reason: ${e.reason}`), + onmessage: (msg) => this.handleGeminiMessage(message.roomId, peerId, msg), + }, + }); + this.peerSessions.set(peerId, session); + } + + async handlePeerDisconnected(message: PeerDisconnected) { + const agentState = this.agents.get(message.roomId); + if (agentState) { + // Check if the disconnecting peer is the agent itself + const room = await this.fishjamClient.getRoom(message.roomId); + const isAgent = room.peers.every((peer) => peer.id !== message.peerId); + if (isAgent) return this.handleAgentDisconnected(message); + } + + this.handleWebrtcPeerDisconnected(message); + } + + handleAgentDisconnected(message: PeerDisconnected) { + console.log(`Agent ${message.peerId} disconnected`); + + this.stopImageCapture(message.roomId); + this.agents.delete(message.roomId); + this.videoTracks.delete(message.roomId); + } + + async handleWebrtcPeerDisconnected(message: PeerDisconnected) { + console.log('Peer disconnected: %O', message); + + const peerId = message.peerId; + const session = this.peerSessions.get(peerId); + session?.close(); + this.peerSessions.delete(peerId); + + const room = await this.fishjamClient.getRoom(message.roomId); + const activePeers = room.peers.filter((peer) => peer.status === 'connected'); + if (activePeers.length === 1) { + console.log('Last peer left room, removing agent'); + this.stopImageCapture(message.roomId); + await this.fishjamClient.deletePeer(message.roomId, activePeers[0].id); + } + } + + handleTrackAdded(message: TrackAdded) { + if (!message.track || message.track.type !== 1) return; // 2 = TRACK_TYPE_VIDEO + + const trackId = message.track.id as TrackId; + const tracks = this.videoTracks.get(message.roomId); + if (tracks) { + tracks.add(trackId); + console.log(`Video track ${trackId} added in room ${message.roomId}`); + } + } + + handleTrackRemoved(message: TrackRemoved) { + if (!message.track) return; + + const trackId = message.track.id as TrackId; + const tracks = this.videoTracks.get(message.roomId); + if (tracks) { + tracks.delete(trackId); + console.log(`Video track ${trackId} removed from room ${message.roomId}`); + } + } + + handleTrackData(message: IncomingTrackData) { + const { data, peerId } = message; + const session = this.peerSessions.get(peerId); + + session?.sendRealtimeInput({ + audio: { + data: data.toBase64(), + mimeType: GeminiIntegration.inputMimeType, + }, + }); + } + + handleTrackImage(roomId: RoomId, message: IncomingTrackImage) { + const { contentType, data } = message; + + for (const [peerId, session] of this.peerSessions) { + session.sendRealtimeInput({ + media: { + data: Buffer.from(data).toString('base64'), + mimeType: contentType, + }, + }); + } + } + + handleGeminiMessage(roomId: RoomId, peerId: PeerId, msg: LiveServerMessage) { + const agentState = this.agents.get(roomId); + if (!agentState) return; + + const audioData = msg.serverContent?.modelTurn?.parts?.[0]?.inlineData; + if (audioData?.data) { + const buffer = Buffer.from(audioData.data, 'base64'); + agentState.agent.sendData(agentState.outputTrackId, new Uint8Array(buffer)); + } + + const transcription = msg.serverContent?.inputTranscription?.text; + if (transcription) console.log(`Peer ${peerId} said: "${transcription}".`); + } + + private startImageCapture(roomId: RoomId) { + const interval = setInterval(() => { + const agentState = this.agents.get(roomId); + const tracks = this.videoTracks.get(roomId); + + if (!agentState || !tracks || tracks.size === 0) return; + + for (const trackId of tracks) { + console.log('Sending image capture request for track', trackId); + agentState.agent.captureImage(trackId); + } + }, CAPTURE_INTERVAL_MS); + + this.captureIntervals.set(roomId, interval); + } + + private stopImageCapture(roomId: RoomId) { + const interval = this.captureIntervals.get(roomId); + if (interval) { + clearInterval(interval); + this.captureIntervals.delete(roomId); + } + } +} diff --git a/examples/multimodal/tsconfig.json b/examples/multimodal/tsconfig.json new file mode 100644 index 00000000..d14f04c0 --- /dev/null +++ b/examples/multimodal/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2021", + "module": "ES2022", + "moduleResolution": "bundler", + "types": ["bun-types"], + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "jsx": "preserve" + } +} diff --git a/packages/fishjam-proto/protos b/packages/fishjam-proto/protos index 40f4ab80..694aad31 160000 --- a/packages/fishjam-proto/protos +++ b/packages/fishjam-proto/protos @@ -1 +1 @@ -Subproject commit 40f4ab8013644de2be5d7d7ff2652725935a2e92 +Subproject commit 694aad31e504028cfebdeaf2bab317e4ad0b81b6 diff --git a/packages/fishjam-proto/src/fishjam/agent_notifications.ts b/packages/fishjam-proto/src/fishjam/agent_notifications.ts index 3bf048c7..e1bd7046 100644 --- a/packages/fishjam-proto/src/fishjam/agent_notifications.ts +++ b/packages/fishjam-proto/src/fishjam/agent_notifications.ts @@ -17,6 +17,7 @@ export interface AgentRequest { removeTrack?: AgentRequest_RemoveTrack | undefined; trackData?: AgentRequest_TrackData | undefined; interruptTrack?: AgentRequest_InterruptTrack | undefined; + captureImage?: AgentRequest_CaptureImage | undefined; } /** Request sent by agent, to authenticate to Fishjam server */ @@ -59,10 +60,15 @@ export interface AgentRequest_InterruptTrack { trackId: string; } +export interface AgentRequest_CaptureImage { + trackId: string; +} + /** Defines any type of message passed from Fishjam to agent peer */ export interface AgentResponse { authenticated?: AgentResponse_Authenticated | undefined; trackData?: AgentResponse_TrackData | undefined; + trackImage?: AgentResponse_TrackImage | undefined; } /** Response confirming successful authentication */ @@ -76,6 +82,12 @@ export interface AgentResponse_TrackData { data: Uint8Array; } +export interface AgentResponse_TrackImage { + trackId: string; + contentType: string; + data: Uint8Array; +} + function createBaseAgentRequest(): AgentRequest { return { authRequest: undefined, @@ -83,6 +95,7 @@ function createBaseAgentRequest(): AgentRequest { removeTrack: undefined, trackData: undefined, interruptTrack: undefined, + captureImage: undefined, }; } @@ -103,6 +116,9 @@ export const AgentRequest: MessageFns = { if (message.interruptTrack !== undefined) { AgentRequest_InterruptTrack.encode(message.interruptTrack, writer.uint32(42).fork()).join(); } + if (message.captureImage !== undefined) { + AgentRequest_CaptureImage.encode(message.captureImage, writer.uint32(50).fork()).join(); + } return writer; }, @@ -153,6 +169,14 @@ export const AgentRequest: MessageFns = { message.interruptTrack = AgentRequest_InterruptTrack.decode(reader, reader.uint32()); continue; } + case 6: { + if (tag !== 50) { + break; + } + + message.captureImage = AgentRequest_CaptureImage.decode(reader, reader.uint32()); + continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -171,6 +195,7 @@ export const AgentRequest: MessageFns = { interruptTrack: isSet(object.interruptTrack) ? AgentRequest_InterruptTrack.fromJSON(object.interruptTrack) : undefined, + captureImage: isSet(object.captureImage) ? AgentRequest_CaptureImage.fromJSON(object.captureImage) : undefined, }; }, @@ -191,6 +216,9 @@ export const AgentRequest: MessageFns = { if (message.interruptTrack !== undefined) { obj.interruptTrack = AgentRequest_InterruptTrack.toJSON(message.interruptTrack); } + if (message.captureImage !== undefined) { + obj.captureImage = AgentRequest_CaptureImage.toJSON(message.captureImage); + } return obj; }, @@ -214,6 +242,9 @@ export const AgentRequest: MessageFns = { message.interruptTrack = (object.interruptTrack !== undefined && object.interruptTrack !== null) ? AgentRequest_InterruptTrack.fromPartial(object.interruptTrack) : undefined; + message.captureImage = (object.captureImage !== undefined && object.captureImage !== null) + ? AgentRequest_CaptureImage.fromPartial(object.captureImage) + : undefined; return message; }, }; @@ -644,8 +675,66 @@ export const AgentRequest_InterruptTrack: MessageFns = { + encode(message: AgentRequest_CaptureImage, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.trackId !== "") { + writer.uint32(10).string(message.trackId); + } + return writer; + }, + + decode(input: BinaryReader | Uint8Array, length?: number): AgentRequest_CaptureImage { + const reader = input instanceof BinaryReader ? input : new BinaryReader(input); + let end = length === undefined ? reader.len : reader.pos + length; + const message = createBaseAgentRequest_CaptureImage(); + while (reader.pos < end) { + const tag = reader.uint32(); + switch (tag >>> 3) { + case 1: { + if (tag !== 10) { + break; + } + + message.trackId = reader.string(); + continue; + } + } + if ((tag & 7) === 4 || tag === 0) { + break; + } + reader.skip(tag & 7); + } + return message; + }, + + fromJSON(object: any): AgentRequest_CaptureImage { + return { trackId: isSet(object.trackId) ? globalThis.String(object.trackId) : "" }; + }, + + toJSON(message: AgentRequest_CaptureImage): unknown { + const obj: any = {}; + if (message.trackId !== "") { + obj.trackId = message.trackId; + } + return obj; + }, + + create, I>>(base?: I): AgentRequest_CaptureImage { + return AgentRequest_CaptureImage.fromPartial(base ?? ({} as any)); + }, + fromPartial, I>>(object: I): AgentRequest_CaptureImage { + const message = createBaseAgentRequest_CaptureImage(); + message.trackId = object.trackId ?? ""; + return message; + }, +}; + function createBaseAgentResponse(): AgentResponse { - return { authenticated: undefined, trackData: undefined }; + return { authenticated: undefined, trackData: undefined, trackImage: undefined }; } export const AgentResponse: MessageFns = { @@ -656,6 +745,9 @@ export const AgentResponse: MessageFns = { if (message.trackData !== undefined) { AgentResponse_TrackData.encode(message.trackData, writer.uint32(18).fork()).join(); } + if (message.trackImage !== undefined) { + AgentResponse_TrackImage.encode(message.trackImage, writer.uint32(26).fork()).join(); + } return writer; }, @@ -682,6 +774,14 @@ export const AgentResponse: MessageFns = { message.trackData = AgentResponse_TrackData.decode(reader, reader.uint32()); continue; } + case 3: { + if (tag !== 26) { + break; + } + + message.trackImage = AgentResponse_TrackImage.decode(reader, reader.uint32()); + continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -697,6 +797,7 @@ export const AgentResponse: MessageFns = { ? AgentResponse_Authenticated.fromJSON(object.authenticated) : undefined, trackData: isSet(object.trackData) ? AgentResponse_TrackData.fromJSON(object.trackData) : undefined, + trackImage: isSet(object.trackImage) ? AgentResponse_TrackImage.fromJSON(object.trackImage) : undefined, }; }, @@ -708,6 +809,9 @@ export const AgentResponse: MessageFns = { if (message.trackData !== undefined) { obj.trackData = AgentResponse_TrackData.toJSON(message.trackData); } + if (message.trackImage !== undefined) { + obj.trackImage = AgentResponse_TrackImage.toJSON(message.trackImage); + } return obj; }, @@ -722,6 +826,9 @@ export const AgentResponse: MessageFns = { message.trackData = (object.trackData !== undefined && object.trackData !== null) ? AgentResponse_TrackData.fromPartial(object.trackData) : undefined; + message.trackImage = (object.trackImage !== undefined && object.trackImage !== null) + ? AgentResponse_TrackImage.fromPartial(object.trackImage) + : undefined; return message; }, }; @@ -861,6 +968,98 @@ export const AgentResponse_TrackData: MessageFns = { }, }; +function createBaseAgentResponse_TrackImage(): AgentResponse_TrackImage { + return { trackId: "", contentType: "", data: new Uint8Array(0) }; +} + +export const AgentResponse_TrackImage: MessageFns = { + encode(message: AgentResponse_TrackImage, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.trackId !== "") { + writer.uint32(10).string(message.trackId); + } + if (message.contentType !== "") { + writer.uint32(18).string(message.contentType); + } + if (message.data.length !== 0) { + writer.uint32(26).bytes(message.data); + } + return writer; + }, + + decode(input: BinaryReader | Uint8Array, length?: number): AgentResponse_TrackImage { + const reader = input instanceof BinaryReader ? input : new BinaryReader(input); + let end = length === undefined ? reader.len : reader.pos + length; + const message = createBaseAgentResponse_TrackImage(); + while (reader.pos < end) { + const tag = reader.uint32(); + switch (tag >>> 3) { + case 1: { + if (tag !== 10) { + break; + } + + message.trackId = reader.string(); + continue; + } + case 2: { + if (tag !== 18) { + break; + } + + message.contentType = reader.string(); + continue; + } + case 3: { + if (tag !== 26) { + break; + } + + message.data = reader.bytes(); + continue; + } + } + if ((tag & 7) === 4 || tag === 0) { + break; + } + reader.skip(tag & 7); + } + return message; + }, + + fromJSON(object: any): AgentResponse_TrackImage { + return { + trackId: isSet(object.trackId) ? globalThis.String(object.trackId) : "", + contentType: isSet(object.contentType) ? globalThis.String(object.contentType) : "", + data: isSet(object.data) ? bytesFromBase64(object.data) : new Uint8Array(0), + }; + }, + + toJSON(message: AgentResponse_TrackImage): unknown { + const obj: any = {}; + if (message.trackId !== "") { + obj.trackId = message.trackId; + } + if (message.contentType !== "") { + obj.contentType = message.contentType; + } + if (message.data.length !== 0) { + obj.data = base64FromBytes(message.data); + } + return obj; + }, + + create, I>>(base?: I): AgentResponse_TrackImage { + return AgentResponse_TrackImage.fromPartial(base ?? ({} as any)); + }, + fromPartial, I>>(object: I): AgentResponse_TrackImage { + const message = createBaseAgentResponse_TrackImage(); + message.trackId = object.trackId ?? ""; + message.contentType = object.contentType ?? ""; + message.data = object.data ?? new Uint8Array(0); + return message; + }, +}; + function bytesFromBase64(b64: string): Uint8Array { if ((globalThis as any).Buffer) { return Uint8Array.from(globalThis.Buffer.from(b64, "base64")); diff --git a/packages/fishjam-proto/src/fishjam/server_notifications.ts b/packages/fishjam-proto/src/fishjam/server_notifications.ts index 1d86636b..5a604350 100644 --- a/packages/fishjam-proto/src/fishjam/server_notifications.ts +++ b/packages/fishjam-proto/src/fishjam/server_notifications.ts @@ -44,6 +44,8 @@ export interface ServerMessage { viewerDisconnected?: ServerMessage_ViewerDisconnected | undefined; streamerConnected?: ServerMessage_StreamerConnected | undefined; streamerDisconnected?: ServerMessage_StreamerDisconnected | undefined; + channelAdded?: ServerMessage_ChannelAdded | undefined; + channelRemoved?: ServerMessage_ChannelRemoved | undefined; } export enum ServerMessage_PeerType { @@ -175,7 +177,7 @@ export interface ServerMessage_AuthRequest { token: string; } -/** Request sent by peer to subsribe for certain message type */ +/** Request sent by peer to subscribe for certain message type */ export interface ServerMessage_SubscribeRequest { eventType: ServerMessage_EventType; } @@ -201,7 +203,7 @@ export interface ServerMessage_HlsPlayable { componentId: string; } -/** Notification sent when the HLS recording is successfully uploded to AWS S3 */ +/** Notification sent when the HLS recording is successfully uploaded to AWS S3 */ export interface ServerMessage_HlsUploaded { roomId: string; } @@ -243,6 +245,22 @@ export interface ServerMessage_TrackMetadataUpdated { track: Track | undefined; } +/** Notification sent when a peer creates a channel */ +export interface ServerMessage_ChannelAdded { + roomId: string; + peerId?: string | undefined; + componentId?: string | undefined; + channelId: string; +} + +/** Notification sent when a peer deletes a channel */ +export interface ServerMessage_ChannelRemoved { + roomId: string; + peerId?: string | undefined; + componentId?: string | undefined; + channelId: string; +} + /** Notification sent when streamer successfully connects */ export interface ServerMessage_StreamConnected { streamId: string; @@ -303,6 +321,8 @@ function createBaseServerMessage(): ServerMessage { viewerDisconnected: undefined, streamerConnected: undefined, streamerDisconnected: undefined, + channelAdded: undefined, + channelRemoved: undefined, }; } @@ -386,6 +406,12 @@ export const ServerMessage: MessageFns = { if (message.streamerDisconnected !== undefined) { ServerMessage_StreamerDisconnected.encode(message.streamerDisconnected, writer.uint32(218).fork()).join(); } + if (message.channelAdded !== undefined) { + ServerMessage_ChannelAdded.encode(message.channelAdded, writer.uint32(226).fork()).join(); + } + if (message.channelRemoved !== undefined) { + ServerMessage_ChannelRemoved.encode(message.channelRemoved, writer.uint32(234).fork()).join(); + } return writer; }, @@ -604,6 +630,22 @@ export const ServerMessage: MessageFns = { message.streamerDisconnected = ServerMessage_StreamerDisconnected.decode(reader, reader.uint32()); continue; } + case 28: { + if (tag !== 226) { + break; + } + + message.channelAdded = ServerMessage_ChannelAdded.decode(reader, reader.uint32()); + continue; + } + case 29: { + if (tag !== 234) { + break; + } + + message.channelRemoved = ServerMessage_ChannelRemoved.decode(reader, reader.uint32()); + continue; + } } if ((tag & 7) === 4 || tag === 0) { break; @@ -671,6 +713,10 @@ export const ServerMessage: MessageFns = { streamerDisconnected: isSet(object.streamerDisconnected) ? ServerMessage_StreamerDisconnected.fromJSON(object.streamerDisconnected) : undefined, + channelAdded: isSet(object.channelAdded) ? ServerMessage_ChannelAdded.fromJSON(object.channelAdded) : undefined, + channelRemoved: isSet(object.channelRemoved) + ? ServerMessage_ChannelRemoved.fromJSON(object.channelRemoved) + : undefined, }; }, @@ -754,6 +800,12 @@ export const ServerMessage: MessageFns = { if (message.streamerDisconnected !== undefined) { obj.streamerDisconnected = ServerMessage_StreamerDisconnected.toJSON(message.streamerDisconnected); } + if (message.channelAdded !== undefined) { + obj.channelAdded = ServerMessage_ChannelAdded.toJSON(message.channelAdded); + } + if (message.channelRemoved !== undefined) { + obj.channelRemoved = ServerMessage_ChannelRemoved.toJSON(message.channelRemoved); + } return obj; }, @@ -840,6 +892,12 @@ export const ServerMessage: MessageFns = { message.streamerDisconnected = (object.streamerDisconnected !== undefined && object.streamerDisconnected !== null) ? ServerMessage_StreamerDisconnected.fromPartial(object.streamerDisconnected) : undefined; + message.channelAdded = (object.channelAdded !== undefined && object.channelAdded !== null) + ? ServerMessage_ChannelAdded.fromPartial(object.channelAdded) + : undefined; + message.channelRemoved = (object.channelRemoved !== undefined && object.channelRemoved !== null) + ? ServerMessage_ChannelRemoved.fromPartial(object.channelRemoved) + : undefined; return message; }, }; @@ -2429,6 +2487,222 @@ export const ServerMessage_TrackMetadataUpdated: MessageFns = { + encode(message: ServerMessage_ChannelAdded, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.roomId !== "") { + writer.uint32(10).string(message.roomId); + } + if (message.peerId !== undefined) { + writer.uint32(18).string(message.peerId); + } + if (message.componentId !== undefined) { + writer.uint32(26).string(message.componentId); + } + if (message.channelId !== "") { + writer.uint32(34).string(message.channelId); + } + return writer; + }, + + decode(input: BinaryReader | Uint8Array, length?: number): ServerMessage_ChannelAdded { + const reader = input instanceof BinaryReader ? input : new BinaryReader(input); + let end = length === undefined ? reader.len : reader.pos + length; + const message = createBaseServerMessage_ChannelAdded(); + while (reader.pos < end) { + const tag = reader.uint32(); + switch (tag >>> 3) { + case 1: { + if (tag !== 10) { + break; + } + + message.roomId = reader.string(); + continue; + } + case 2: { + if (tag !== 18) { + break; + } + + message.peerId = reader.string(); + continue; + } + case 3: { + if (tag !== 26) { + break; + } + + message.componentId = reader.string(); + continue; + } + case 4: { + if (tag !== 34) { + break; + } + + message.channelId = reader.string(); + continue; + } + } + if ((tag & 7) === 4 || tag === 0) { + break; + } + reader.skip(tag & 7); + } + return message; + }, + + fromJSON(object: any): ServerMessage_ChannelAdded { + return { + roomId: isSet(object.roomId) ? globalThis.String(object.roomId) : "", + peerId: isSet(object.peerId) ? globalThis.String(object.peerId) : undefined, + componentId: isSet(object.componentId) ? globalThis.String(object.componentId) : undefined, + channelId: isSet(object.channelId) ? globalThis.String(object.channelId) : "", + }; + }, + + toJSON(message: ServerMessage_ChannelAdded): unknown { + const obj: any = {}; + if (message.roomId !== "") { + obj.roomId = message.roomId; + } + if (message.peerId !== undefined) { + obj.peerId = message.peerId; + } + if (message.componentId !== undefined) { + obj.componentId = message.componentId; + } + if (message.channelId !== "") { + obj.channelId = message.channelId; + } + return obj; + }, + + create, I>>(base?: I): ServerMessage_ChannelAdded { + return ServerMessage_ChannelAdded.fromPartial(base ?? ({} as any)); + }, + fromPartial, I>>(object: I): ServerMessage_ChannelAdded { + const message = createBaseServerMessage_ChannelAdded(); + message.roomId = object.roomId ?? ""; + message.peerId = object.peerId ?? undefined; + message.componentId = object.componentId ?? undefined; + message.channelId = object.channelId ?? ""; + return message; + }, +}; + +function createBaseServerMessage_ChannelRemoved(): ServerMessage_ChannelRemoved { + return { roomId: "", peerId: undefined, componentId: undefined, channelId: "" }; +} + +export const ServerMessage_ChannelRemoved: MessageFns = { + encode(message: ServerMessage_ChannelRemoved, writer: BinaryWriter = new BinaryWriter()): BinaryWriter { + if (message.roomId !== "") { + writer.uint32(10).string(message.roomId); + } + if (message.peerId !== undefined) { + writer.uint32(18).string(message.peerId); + } + if (message.componentId !== undefined) { + writer.uint32(26).string(message.componentId); + } + if (message.channelId !== "") { + writer.uint32(34).string(message.channelId); + } + return writer; + }, + + decode(input: BinaryReader | Uint8Array, length?: number): ServerMessage_ChannelRemoved { + const reader = input instanceof BinaryReader ? input : new BinaryReader(input); + let end = length === undefined ? reader.len : reader.pos + length; + const message = createBaseServerMessage_ChannelRemoved(); + while (reader.pos < end) { + const tag = reader.uint32(); + switch (tag >>> 3) { + case 1: { + if (tag !== 10) { + break; + } + + message.roomId = reader.string(); + continue; + } + case 2: { + if (tag !== 18) { + break; + } + + message.peerId = reader.string(); + continue; + } + case 3: { + if (tag !== 26) { + break; + } + + message.componentId = reader.string(); + continue; + } + case 4: { + if (tag !== 34) { + break; + } + + message.channelId = reader.string(); + continue; + } + } + if ((tag & 7) === 4 || tag === 0) { + break; + } + reader.skip(tag & 7); + } + return message; + }, + + fromJSON(object: any): ServerMessage_ChannelRemoved { + return { + roomId: isSet(object.roomId) ? globalThis.String(object.roomId) : "", + peerId: isSet(object.peerId) ? globalThis.String(object.peerId) : undefined, + componentId: isSet(object.componentId) ? globalThis.String(object.componentId) : undefined, + channelId: isSet(object.channelId) ? globalThis.String(object.channelId) : "", + }; + }, + + toJSON(message: ServerMessage_ChannelRemoved): unknown { + const obj: any = {}; + if (message.roomId !== "") { + obj.roomId = message.roomId; + } + if (message.peerId !== undefined) { + obj.peerId = message.peerId; + } + if (message.componentId !== undefined) { + obj.componentId = message.componentId; + } + if (message.channelId !== "") { + obj.channelId = message.channelId; + } + return obj; + }, + + create, I>>(base?: I): ServerMessage_ChannelRemoved { + return ServerMessage_ChannelRemoved.fromPartial(base ?? ({} as any)); + }, + fromPartial, I>>(object: I): ServerMessage_ChannelRemoved { + const message = createBaseServerMessage_ChannelRemoved(); + message.roomId = object.roomId ?? ""; + message.peerId = object.peerId ?? undefined; + message.componentId = object.componentId ?? undefined; + message.channelId = object.channelId ?? ""; + return message; + }, +}; + function createBaseServerMessage_StreamConnected(): ServerMessage_StreamConnected { return { streamId: "" }; } diff --git a/packages/js-server-sdk/src/agent.ts b/packages/js-server-sdk/src/agent.ts index 56503913..51531d04 100644 --- a/packages/js-server-sdk/src/agent.ts +++ b/packages/js-server-sdk/src/agent.ts @@ -7,6 +7,7 @@ import { AgentRequest_TrackData, AgentResponse, AgentResponse_TrackData, + AgentResponse_TrackImage, Track as ProtoTrack, TrackType as ProtoTrackType, TrackEncoding, @@ -14,13 +15,14 @@ import { import { AgentCallbacks, Brand, FishjamConfig, PeerId } from './types'; import { getFishjamUrl, httpToWebsocket, WithPeerId } from './utils'; -const expectedEventsList = ['trackData'] as const; +const expectedEventsList = ['trackData', 'trackImage'] as const; /** * @useDeclaredType */ export type ExpectedAgentEvents = (typeof expectedEventsList)[number]; export type IncomingTrackData = Omit, 'peerId'> & { peerId: PeerId }; +export type IncomingTrackImage = NonNullable; export type OutgoingTrackData = Omit, 'peerId'> & { peerId: PeerId }; export type AgentTrack = Omit & { id: TrackId }; @@ -30,6 +32,7 @@ export type AudioCodecParameters = { encoding: 'opus' | 'pcm16'; sampleRate: 16000 | 24000 | 48000; channels: 1; + metadata?: object; }; export type TrackId = Brand; @@ -78,10 +81,11 @@ export class FishjamAgent extends (EventEmitter as new () => TypedEmitter TypedEmitter TypedEmitter Date: Mon, 9 Feb 2026 11:43:23 +0100 Subject: [PATCH 2/2] Adjust to CR --- .gitmodules | 1 - examples/multimodal/src/service/multimodal.ts | 2 +- packages/fishjam-proto/protos | 2 +- packages/js-server-sdk/src/agent.ts | 5 +---- packages/js-server-sdk/src/integrations/gemini.ts | 1 - 5 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.gitmodules b/.gitmodules index 8f7a32f3..2294e3fc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,3 @@ [submodule "packages/fishjam-proto/protos"] path = packages/fishjam-proto/protos url = https://github.com/fishjam-cloud/protos.git - branch = agent-capture-image diff --git a/examples/multimodal/src/service/multimodal.ts b/examples/multimodal/src/service/multimodal.ts index e841473c..33cf6763 100644 --- a/examples/multimodal/src/service/multimodal.ts +++ b/examples/multimodal/src/service/multimodal.ts @@ -141,7 +141,7 @@ export class MultimodalService { } handleTrackAdded(message: TrackAdded) { - if (!message.track || message.track.type !== 1) return; // 2 = TRACK_TYPE_VIDEO + if (!message.track || message.track.type !== 1) return; const trackId = message.track.id as TrackId; const tracks = this.videoTracks.get(message.roomId); diff --git a/packages/fishjam-proto/protos b/packages/fishjam-proto/protos index 694aad31..168e1695 160000 --- a/packages/fishjam-proto/protos +++ b/packages/fishjam-proto/protos @@ -1 +1 @@ -Subproject commit 694aad31e504028cfebdeaf2bab317e4ad0b81b6 +Subproject commit 168e169597f90360a812ee26e94f3b3316c7d7d1 diff --git a/packages/js-server-sdk/src/agent.ts b/packages/js-server-sdk/src/agent.ts index 51531d04..6584a0f7 100644 --- a/packages/js-server-sdk/src/agent.ts +++ b/packages/js-server-sdk/src/agent.ts @@ -32,7 +32,6 @@ export type AudioCodecParameters = { encoding: 'opus' | 'pcm16'; sampleRate: 16000 | 24000 | 48000; channels: 1; - metadata?: object; }; export type TrackId = Brand; @@ -81,11 +80,10 @@ export class FishjamAgent extends (EventEmitter as new () => TypedEmitter TypedEmitter