From 520b5ed9b2bcf6b843e8b177ea3c9e51838ae019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Wed, 14 Jan 2026 11:38:55 +0100 Subject: [PATCH 1/2] feat: add support for STT orchestration --- CHANGELOG.md | 4 ++++ README.md | 15 +++++++++++++++ src/pyannoteai/sdk/client.py | 9 +++++++++ 3 files changed, 28 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d809a7..11eb4ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## Version 0.4.0 (2026-01-14) + +- feat: add support for STT orchestration + ## Version 0.3.0 (2025-10-10) - feat: add support for passing audio as a `{"audio": str | Path}` mapping diff --git a/README.md b/README.md index e4d2ecd..b53f5e6 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,21 @@ diarization = client.retrieve(job_id) Use `help(client.diarize)` to learn about options. +## STT orchestration + +```python +# submit a diarization job with STT orchestration +job_id = client.diarize( + media_url, + transcription=True, + transcription_config={"model": "parakeet-tdt-0.6b-v3"}) + +# retrieve speaker-attributed transcription +orchestration = client.retrieve(job_id) +# orchestration['output']['turnLevelTranscription'] +# orchestration['output']['wordLevelTranscription'] +``` + ## Speaker identification ```python diff --git a/src/pyannoteai/sdk/client.py b/src/pyannoteai/sdk/client.py index 5fabb55..a3bb1a5 100644 --- a/src/pyannoteai/sdk/client.py +++ b/src/pyannoteai/sdk/client.py @@ -370,6 +370,8 @@ def diarize( turn_level_confidence: bool = False, exclusive: bool = False, model: str = "precision-2", + transcription: bool = False, + transcription_config: dict | None = None, **kwargs, ) -> str: """Initiate a diarization job on the pyannoteAI web API @@ -394,6 +396,10 @@ def diarize( Enable exclusive speaker diarization. model : str, optional Defaults to "precision-2" + transcription : bool, optional + Enable STT orchestration. + transcription_config : dict, optional + STT configuration parameters, including model selection. **kwargs : optional Extra arguments to send in the body of the request. @@ -416,7 +422,10 @@ def diarize( "confidence": confidence, "turnLevelConfidence": turn_level_confidence, "exclusive": exclusive, + "transcription": transcription, + "transcriptionConfig": transcription_config or dict(), } + # add extra arguments to the request body json.update(kwargs) From 85b8498d43fe68b7eec2a3f81eee42d3070b8c72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20BREDIN?= Date: Wed, 14 Jan 2026 11:51:42 +0100 Subject: [PATCH 2/2] fix: code review --- src/pyannoteai/sdk/client.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pyannoteai/sdk/client.py b/src/pyannoteai/sdk/client.py index a3bb1a5..0853238 100644 --- a/src/pyannoteai/sdk/client.py +++ b/src/pyannoteai/sdk/client.py @@ -423,9 +423,11 @@ def diarize( "turnLevelConfidence": turn_level_confidence, "exclusive": exclusive, "transcription": transcription, - "transcriptionConfig": transcription_config or dict(), } - + + if transcription_config is not None: + json["transcriptionConfig"] = transcription_config + # add extra arguments to the request body json.update(kwargs)