From 90e576926863da12a0bc45e0f981f089127ea4bc Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 17:59:59 -0700 Subject: [PATCH 1/3] updated response type for stt --- jigsawstack/audio.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 575b839..c7a7acf 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -34,6 +34,8 @@ class SpeechToTextResponse(BaseResponse): text: str chunks: List[ChunkParams] speakers: Optional[List[BySpeakerParams]] + language_detected: Optional[str] + confidence: Optional[float] class SpeechToTextWebhookResponse(BaseResponse): From 1487dfce40d7afa5a7b598f9fbb225ef4a8d6dc1 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 18:05:54 -0700 Subject: [PATCH 2/3] update stt descriptions --- jigsawstack/audio.py | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index c7a7acf..0cfc23e 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -10,13 +10,44 @@ class SpeechToTextParams(TypedDict): url: NotRequired[str] + """ + the url of the audio file to transcribe, optional if file_store_key is provided + """ + file_store_key: NotRequired[str] + """ + the file store key of the audio file to transcribe, optional if url is provided + """ + language: NotRequired[Union[str, Literal["auto"]]] + """ + The language to transcribe or translate the file into. Use “auto” for automatic language detection, or specify a language code. If not specified, defaults to automatic detection. All supported language codes can be found + """ + translate: NotRequired[bool] + """ + When set to true, translates the content into English (or the specified language if language parameter is provided) + """ + by_speaker: NotRequired[bool] + """ + Identifies and separates different speakers in the audio file. When enabled, the response will include a speakers array with speaker-segmented transcripts. + """ + webhook_url: NotRequired[str] + """ + Webhook URL to send result to. When provided, the API will process asynchronously and send results to this URL when completed. + """ + batch_size: NotRequired[int] + """ + The batch size to return. Maximum value is 40. This controls how the audio is chunked for processing. + """ + chunk_duration: NotRequired[int] + """ + the duration of each chunk in seconds, defaults to 30 + """ class ChunkParams(TypedDict): @@ -32,10 +63,29 @@ class BySpeakerParams(ChunkParams): class SpeechToTextResponse(BaseResponse): text: str + """ + the text of the transcription + """ + chunks: List[ChunkParams] + """ + the chunks of the transcription + """ + speakers: Optional[List[BySpeakerParams]] + """ + the speakers of the transcription, available if by_speaker is set to true + """ + language_detected: Optional[str] + """ + the language detected in the transcription, available if language is set to auto + """ + confidence: Optional[float] + """ + the confidence of the transcription language detection, available if language is set to auto + """ class SpeechToTextWebhookResponse(BaseResponse): From 0d71a8da0e88a41540808312f42270f50a024fe3 Mon Sep 17 00:00:00 2001 From: Win Cheng Date: Sun, 21 Sep 2025 18:08:31 -0700 Subject: [PATCH 3/3] updated description --- jigsawstack/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jigsawstack/audio.py b/jigsawstack/audio.py index 0cfc23e..22b1f7a 100644 --- a/jigsawstack/audio.py +++ b/jigsawstack/audio.py @@ -46,7 +46,7 @@ class SpeechToTextParams(TypedDict): chunk_duration: NotRequired[int] """ - the duration of each chunk in seconds, defaults to 30 + the duration of each chunk in seconds, maximum value is 15, defaults to 3 """