From 842fe26eb5376d4869837f74c762034a4e1650b4 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Fri, 9 Jan 2026 14:59:34 +0000 Subject: [PATCH 1/4] Removed info admon from batch input, with the redirects to Flow and RT --- docs/speech-to-text/batch/input.mdx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/speech-to-text/batch/input.mdx b/docs/speech-to-text/batch/input.mdx index cc77e31f..4df32ba1 100644 --- a/docs/speech-to-text/batch/input.mdx +++ b/docs/speech-to-text/batch/input.mdx @@ -10,12 +10,6 @@ import batchSchema from "!openapi-schema-loader!@site/spec/batch.yaml"; # Input -:::info -This page documents audio inputs for transcription by **REST API** (a.k.a. Batch SaaS). -* For Realtime transcription, see the [Realtime Transcription input](/speech-to-text/realtime/input). -* For Flow Voice AI, see the [Flow Voice AI supported formats and limits](/voice-agents/flow/supported-formats-and-limits). -::: - ## Supported file types The following file formats types are supported for transcription by REST API: From df7331372d399840f6f181703ebb97d0459864db Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 13 Jan 2026 13:51:42 +0000 Subject: [PATCH 2/4] Realtime input page formatting updates and next steps section --- docs/speech-to-text/realtime/input.mdx | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/speech-to-text/realtime/input.mdx b/docs/speech-to-text/realtime/input.mdx index 69d9e01e..01e3a3f0 100644 --- a/docs/speech-to-text/realtime/input.mdx +++ b/docs/speech-to-text/realtime/input.mdx @@ -11,19 +11,12 @@ import realtimeSchema from "!asyncapi-schema-loader!@site/spec/realtime.yaml" # Input -:::info -This page is about the **Realtime transcription API** (websocket). -* For information on Batch SaaS, see the [Batch SaaS input](/speech-to-text/batch/input). -* For information on Flow Voice AI, see the [Flow Voice AI input](/voice-agents/flow/supported-formats-and-limits). -::: - ## Supported input audio formats - -Sessions can be configured to use two types of audio input, `file` and `raw`. Unless you have a specific reason to use the `file` option, we recommend using the `raw` option. - +Sessions can be configured to use two types of audio input: `file` and `raw`. +We recommend using the `raw` option, unless you have a specific reason to use the `file` option. :::tip -For capturing raw audio in the browser, try our `browser-audio-input` package, [available here on NPM](https://www.npmjs.com/package/@speechmatics/browser-audio-input). +For capturing raw audio in the browser, try our [`browser-audio-input` package](https://www.npmjs.com/package/@speechmatics/browser-audio-input). ::: ### `audio_format` @@ -36,3 +29,8 @@ The format must be supplied in the `audio_format` field of the `StartRecognition After receiving a `RecognitionStarted` message, you can start sending audio over the Websocket connection. Audio is sent as binary data, encoded in the format specified in the `StartRecognition` message. See [Protocol overview](/api-ref/realtime-transcription-websocket#protocol-overview) for complete details of the API protocol. +## Next steps + +View our guides: +- [using a microphone](docs/speech-to-text/realtime/guides/python-using-microphone.mdx) to learn how to capture audio from a microphone. +- [using FFMPEG](docs/speech-to-text/realtime/guides/python-using-ffmpeg.mdx) to find out how to pipe microphone audio to the API. \ No newline at end of file From 4d374597eb7db288075b4b7b1e029502d1b726ca Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 13 Jan 2026 15:05:12 +0000 Subject: [PATCH 3/4] Update page titles --- docs/speech-to-text/batch/limits.mdx | 2 +- docs/speech-to-text/realtime/limits.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/speech-to-text/batch/limits.mdx b/docs/speech-to-text/batch/limits.mdx index 7966f8ed..a035c50e 100644 --- a/docs/speech-to-text/batch/limits.mdx +++ b/docs/speech-to-text/batch/limits.mdx @@ -5,7 +5,7 @@ description: 'Learn about rate limiting and usage limits for the Speechmatics Ba import HTTPMethodBadge from '@theme/HTTPMethodBadge' -# Limits – Batch transcription +# Limits – Batch ## Rate limiting and fair usage diff --git a/docs/speech-to-text/realtime/limits.mdx b/docs/speech-to-text/realtime/limits.mdx index 41a55033..7eb9e4b5 100644 --- a/docs/speech-to-text/realtime/limits.mdx +++ b/docs/speech-to-text/realtime/limits.mdx @@ -5,7 +5,7 @@ description: 'Learn about the limits for the Speechmatics Realtime API' import HTTPMethodBadge from '@theme/HTTPMethodBadge' -# Limits – Realtime transcription +# Limits – Realtime Speechmatics limits the number of hours of audio users can process each month to help manage load on our servers. The current limits (in hours) by account type are listed in the table below: From 4f66be294100ede9f1ffdb56f66cca18ecc84c50 Mon Sep 17 00:00:00 2001 From: lgavincrl Date: Tue, 13 Jan 2026 15:05:31 +0000 Subject: [PATCH 4/4] Update ampersands to 'and' --- docs/speech-to-text/realtime/turn-detection.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/speech-to-text/realtime/turn-detection.mdx b/docs/speech-to-text/realtime/turn-detection.mdx index 396993e9..54f93a73 100644 --- a/docs/speech-to-text/realtime/turn-detection.mdx +++ b/docs/speech-to-text/realtime/turn-detection.mdx @@ -25,11 +25,11 @@ Use the end of utterance feature to help with turn detection in real-time conver ## Use cases -**Voice AI & conversational systems**: Enable voice assistants and chatbots to detect when the user has finished speaking, allowing the system to respond promptly without awkward delays. +**Voice AI and conversational systems**: Enable voice assistants and chatbots to detect when the user has finished speaking, allowing the system to respond promptly without awkward delays. **Realtime translation**: Critical for live interpretation services where translations need to be delivered as soon as the speaker completes their thought, maintaining the flow of conversation. -**Dictation & transcription**: Helps dictation software determine when users have completed their input, improving speed of final transcription and user experience. +**Dictation and transcription**: Helps dictation software determine when users have completed their input, improving speed of final transcription and user experience. ## End of utterance