diff --git a/specs/signalwire-rest/fabric-api/ai-agent/models/ai/ai_params.tsp b/specs/signalwire-rest/fabric-api/ai-agent/models/ai/ai_params.tsp index b862a2f07..ea74252b0 100644 --- a/specs/signalwire-rest/fabric-api/ai-agent/models/ai/ai_params.tsp +++ b/specs/signalwire-rest/fabric-api/ai-agent/models/ai/ai_params.tsp @@ -11,16 +11,47 @@ enum Direction { outbound, } +@summary("Conversation message role") +enum ConversationRole { + @doc("A message from the user.") + user, + + @doc("A message from the AI assistant.") + assistant, + + @doc("A system message providing instructions or context.") + system, +} + +@summary("Conversation message object") +@doc("A message object representing a single turn in the conversation history.") +model ConversationMessage { + @doc("The role of the message sender.") + role: ConversationRole; + + @doc("The text content of the message.") + content: string; + + @doc("Optional language code for the message (e.g., 'en', 'es', 'fr').") + lang?: string; +} + @summary("params object") model AIParams { @doc("Instructs the agent to acknowledge crosstalk and confirm user input when the user speaks over the agent.") acknowledge_interruptions?: boolean; + @doc("Sets the name the AI agent responds to for wake/activation purposes. When using `enable_pause`, `start_paused`, or `speak_when_spoken_to`, the user must say this name to get the agent's attention.") + ai_name?: string = "computer"; + @doc("Adjust the volume of the AI. Allowed values from `-50` - `50`.") @minValue(-50) @maxValue(50) ai_volume?: integer; + @doc("A custom identifier for the AI application instance. This name is included in webhook payloads.") + app_name?: string = "swml app"; + @doc("Amount of time, in ms, to wait before prompting the user to respond. Allowed values from `10,000` - `600,000`. Set to `0` to disable.") attention_timeout?: IntegerOrZero | 0; @@ -57,6 +88,9 @@ model AIParams { @example("Place an order") conscience?: string; + @doc("Injects pre-existing conversation history into the AI session at startup. This allows you to seed the AI agent with context from a previous conversation or provide example interactions.") + convo?: ConversationMessage[]; + @doc("Used by `check_for_input` and `save_conversation` to identify an individual conversation.") @example("Conversation ID") conversation_id?: string; @@ -70,6 +104,9 @@ model AIParams { @example("https://example.com") debug_webhook_url?: url; + @doc("Enables debug mode for the AI session.") + debug?: boolean | integer; + @doc("Forces the direction of the call to the assistant. Valid values are `inbound` and `outbound`.") direction?: Direction[]; @@ -87,6 +124,15 @@ model AIParams { @maxValue(10000) end_of_speech_timeout?: integer; + @doc("Enables the inner dialog feature for background conversation analysis.") + enable_inner_dialog?: boolean = false; + + @doc("Enables the pause/resume functionality for the AI agent.") + enable_pause?: boolean = false; + + @doc("Enables intelligent turn detection that monitors partial speech transcripts.") + enable_turn_detection?: boolean = true; + @doc("The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice.") eleven_labs_stability?: float; @@ -110,6 +156,15 @@ model AIParams { @maxValue(3600000) inactivity_timeout?: integer; + @doc("Specifies the AI model to use for the inner dialog feature.") + inner_dialog_model?: string; + + @doc("The system prompt that guides the inner dialog AI's behavior.") + inner_dialog_prompt?: string = "The assistant is intelligent and straightforward, does its job well and is not excessively polite."; + + @doc("When enabled, synchronizes the inner dialog with the main conversation flow.") + inner_dialog_synced?: boolean = false; + @doc(""" Check for input function with check_for_input. Example use case: Feeding an inbound SMS to AI on a voice call, eg., for collecting an email address or other complex information. @@ -130,11 +185,22 @@ model AIParams { @example("America/Ensenada") local_tz?: string; + @doc("Sets the maximum number of tokens the AI model can generate in a single response.") + @minValue(1) + @maxValue(16384) + max_response_tokens?: integer; + @doc("Sets a time duration for the outbound call recipient to respond to the AI agent before timeout, in a range from `10000` to `600000`.") @minValue(10000) @maxValue(600000) outbound_attention_timeout?: integer; + @doc("When enabled, global_data persists across multiple AI agent invocations within the same call.") + persist_global_data?: boolean = true; + + @doc("Specifies the output format for structured prompts. Valid values are `markdown` or `xml`.") + pom_format?: "markdown" | "xml" = "markdown"; + @doc(""" Send a summary of the conversation after the call ends. This requires a `post_url` to be set in the ai parameters and the `conversation_id` defined below. @@ -142,6 +208,12 @@ model AIParams { """) save_conversation?: boolean; + @doc("When enabled, the AI agent remains silent until directly addressed by name.") + speak_when_spoken_to?: boolean = false; + + @doc("When enabled, the AI agent starts in a paused state.") + start_paused?: boolean = false; + @doc("Allows tweaking any of the indicated settings, such as `barge_match_string`, using the returned SWML from the SWAIG function.") swaig_allow_settings?: boolean; @@ -151,12 +223,26 @@ model AIParams { @doc("Post entire conversation to any SWAIG call.") swaig_post_conversation?: boolean; + @doc("Controls whether SWML variables are included in SWAIG function webhook payloads.") + swaig_post_swml_vars?: boolean | string[]; + @doc("Pass a summary of a conversation from one AI agent to another. For example, transfer a call summary between support agents in two departments.") transfer_summary?: boolean; + @doc("Time in milliseconds to wait after detecting a potential end-of-turn before finalizing speech recognition.") + @minValue(0) + @maxValue(10000) + turn_detection_timeout?: integer = 250; + + @doc("Configures Silero Voice Activity Detection (VAD) settings. Format: `threshold` or `threshold:frame_ms`. The threshold (0-100) sets sensitivity for detecting voice activity. The optional frame_ms (16-40) sets frame duration in milliseconds.") + vad_config?: string; + @doc("Enable verbose logging.") verbose_logs?: boolean; @doc("When false, AI agent will initialize dialogue after call is setup. When true, agent will wait for the user to speak first.") wait_for_user?: boolean; + + @doc("Specifies an additional prefix that must be spoken along with the agent's name to wake the agent.") + wake_prefix?: string; } diff --git a/specs/signalwire-rest/fabric-api/tsp-output/@typespec/openapi3/openapi.yaml b/specs/signalwire-rest/fabric-api/tsp-output/@typespec/openapi3/openapi.yaml index 9dfe85376..b91705025 100644 --- a/specs/signalwire-rest/fabric-api/tsp-output/@typespec/openapi3/openapi.yaml +++ b/specs/signalwire-rest/fabric-api/tsp-output/@typespec/openapi3/openapi.yaml @@ -4572,11 +4572,19 @@ components: acknowledge_interruptions: type: boolean description: Instructs the agent to acknowledge crosstalk and confirm user input when the user speaks over the agent. + ai_name: + type: string + description: Sets the name the AI agent responds to for wake/activation purposes. When using `enable_pause`, `start_paused`, or `speak_when_spoken_to`, the user must say this name to get the agent's attention. + default: computer ai_volume: type: integer minimum: -50 maximum: 50 description: Adjust the volume of the AI. Allowed values from `-50` - `50`. + app_name: + type: string + description: A custom identifier for the AI application instance. This name is included in webhook payloads. + default: swml app attention_timeout: anyOf: - $ref: '#/components/schemas/IntegerOrZero' @@ -4618,6 +4626,11 @@ components: type: string description: Sets the prompt which binds the agent to its purpose. example: Place an order + convo: + type: array + items: + $ref: '#/components/schemas/ConversationMessage' + description: Injects pre-existing conversation history into the AI session at startup. This allows you to seed the AI agent with context from a previous conversation or provide example interactions. conversation_id: type: string description: Used by `check_for_input` and `save_conversation` to identify an individual conversation. @@ -4632,6 +4645,11 @@ components: format: uri description: Each interaction between the AI and end user is posted in real time to the established URL. example: https://example.com + debug: + anyOf: + - type: boolean + - type: integer + description: Enables debug mode for the AI session. direction: type: array items: @@ -4651,6 +4669,18 @@ components: minimum: 250 maximum: 10000 description: Amount of silence, in ms, at the end of an utterance to detect end of speech. Allowed values from `250` - `10,000`. + enable_inner_dialog: + type: boolean + description: Enables the inner dialog feature for background conversation analysis. + default: false + enable_pause: + type: boolean + description: Enables the pause/resume functionality for the AI agent. + default: false + enable_turn_detection: + type: boolean + description: Enables intelligent turn detection that monitors partial speech transcripts. + default: true eleven_labs_stability: type: number description: The stability slider determines how stable the voice is and the randomness between each generation. Lowering this slider introduces a broader emotional range for the voice. @@ -4675,6 +4705,17 @@ components: minimum: 10000 maximum: 3600000 description: Amount of time, in ms, to wait before exiting the app due to inactivity. Allowed values from `10,000` - `3,600,000`. + inner_dialog_model: + type: string + description: Specifies the AI model to use for the inner dialog feature. + inner_dialog_prompt: + type: string + description: The system prompt that guides the inner dialog AI's behavior. + default: The assistant is intelligent and straightforward, does its job well and is not excessively polite. + inner_dialog_synced: + type: boolean + description: When enabled, synchronizes the inner dialog with the main conversation flow. + default: false input_poll_freq: type: string description: |- @@ -4694,17 +4735,41 @@ components: type: string description: The local timezone setting for the AI. Value should use `IANA TZ ID` example: America/Ensenada + max_response_tokens: + type: integer + minimum: 1 + maximum: 16384 + description: Sets the maximum number of tokens the AI model can generate in a single response. outbound_attention_timeout: type: integer minimum: 10000 maximum: 600000 description: Sets a time duration for the outbound call recipient to respond to the AI agent before timeout, in a range from `10000` to `600000`. + persist_global_data: + type: boolean + description: When enabled, global_data persists across multiple AI agent invocations within the same call. + default: true + pom_format: + type: string + enum: + - markdown + - xml + description: Specifies the output format for structured prompts. Valid values are `markdown` or `xml`. + default: markdown save_conversation: type: boolean description: |- Send a summary of the conversation after the call ends. This requires a `post_url` to be set in the ai parameters and the `conversation_id` defined below. This eliminates the need for a `post_prompt` in the ai parameters. + speak_when_spoken_to: + type: boolean + description: When enabled, the AI agent remains silent until directly addressed by name. + default: false + start_paused: + type: boolean + description: When enabled, the AI agent starts in a paused state. + default: false swaig_allow_settings: type: boolean description: Allows tweaking any of the indicated settings, such as `barge_match_string`, using the returned SWML from the SWAIG function. @@ -4714,15 +4779,34 @@ components: swaig_post_conversation: type: boolean description: Post entire conversation to any SWAIG call. + swaig_post_swml_vars: + anyOf: + - type: boolean + - type: array + items: + type: string + description: Controls whether SWML variables are included in SWAIG function webhook payloads. transfer_summary: type: boolean description: Pass a summary of a conversation from one AI agent to another. For example, transfer a call summary between support agents in two departments. + turn_detection_timeout: + type: integer + minimum: 0 + maximum: 10000 + description: Time in milliseconds to wait after detecting a potential end-of-turn before finalizing speech recognition. + default: 250 + vad_config: + type: string + description: 'Configures Silero Voice Activity Detection (VAD) settings. Format: `threshold` or `threshold:frame_ms`. The threshold (0-100) sets sensitivity for detecting voice activity. The optional frame_ms (16-40) sets frame duration in milliseconds.' verbose_logs: type: boolean description: Enable verbose logging. wait_for_user: type: boolean description: When false, AI agent will initialize dialogue after call is setup. When true, agent will wait for the user to speak first. + wake_prefix: + type: string + description: Specifies an additional prefix that must be spoken along with the agent's name to wake the agent. title: params object AIPostPrompt: type: object @@ -6593,6 +6677,31 @@ components: additionalProperties: $ref: '#/components/schemas/ContextStepsUpdate' title: contexts + ConversationMessage: + type: object + required: + - role + - content + properties: + role: + allOf: + - $ref: '#/components/schemas/ConversationRole' + description: The role of the message sender. + content: + type: string + description: The text content of the message. + lang: + type: string + description: Optional language code for the message (e.g., 'en', 'es', 'fr'). + description: A message object representing a single turn in the conversation history. + title: Conversation message object + ConversationRole: + type: string + enum: + - user + - assistant + - system + title: Conversation message role CxmlApplication: type: object required: diff --git a/specs/swml/Methods/ai/ai_params.tsp b/specs/swml/Methods/ai/ai_params.tsp index 012880404..1e2f2e7ba 100644 --- a/specs/swml/Methods/ai/ai_params.tsp +++ b/specs/swml/Methods/ai/ai_params.tsp @@ -17,6 +17,31 @@ enum Direction { outbound, } +@summary("Conversation message role") +enum ConversationRole { + @doc("A message from the user.") + user, + + @doc("A message from the AI assistant.") + assistant, + + @doc("A system message providing instructions or context.") + system, +} + +@summary("Conversation message object") +@doc("A message object representing a single turn in the conversation history.") +model ConversationMessage { + @doc("The role of the message sender.") + role: ConversationRole; + + @doc("The text content of the message.") + content: string; + + @doc("Optional language code for the message (e.g., 'en', 'es', 'fr').") + lang?: string; +} + @summary("params object") model AIParams { @doc("Instructs the agent to acknowledge crosstalk and confirm user input when the user speaks over the agent.") @@ -25,11 +50,17 @@ model AIParams { @doc("The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`.") ai_model?: SupportedAIModels; + @doc("Sets the name the AI agent responds to for wake/activation purposes. When using `enable_pause`, `start_paused`, or `speak_when_spoken_to`, the user must say this name to get the agent's attention. The name matching is case-insensitive.") + ai_name?: string = "computer"; + @doc("Adjust the volume of the AI. Allowed values from `-50` - `50`.") @minValue(-50) @maxValue(50) ai_volume?: integer | SWMLVar; + @doc("A custom identifier for the AI application instance. This name is included in webhook payloads, allowing backend systems to identify which AI configuration made the request.") + app_name?: string = "swml app"; + @doc(""" If true, enables smart formatting in ASR (Automatic Speech Recognition). This improves the formatting of numbers, dates, times, and other entities in the transcript. @@ -81,6 +112,28 @@ model AIParams { """) enable_barge?: string | boolean | SWMLVar; + @doc(""" + Enables the inner dialog feature, which runs a separate AI process in the background + that analyzes the conversation and provides real-time insights to the main AI agent. + This gives the agent a form of "internal thought process" that can help it make better decisions. + """) + enable_inner_dialog?: boolean | SWMLVar = false; + + @doc(""" + Enables the pause/resume functionality for the AI agent. When enabled, a `pause_conversation` + function is automatically added that the AI can call when the user says things like "hold on", + "wait", or "pause". While paused, the agent stops responding until the user speaks the agent's + name (set via `ai_name`) to resume. Cannot be used together with `speak_when_spoken_to`. + """) + enable_pause?: boolean | SWMLVar = false; + + @doc(""" + Enables intelligent turn detection that monitors partial speech transcripts for sentence-ending + punctuation. When detected, the system can proactively finalize the speech recognition, + reducing latency before the AI responds. Works with `turn_detection_timeout`. + """) + enable_turn_detection?: boolean | SWMLVar = true; + @doc(""" Takes a string, including a regular expression, defining barge behavior. For example, this param can direct the AI to stop when the word 'hippopotomus' is input. @@ -101,6 +154,9 @@ model AIParams { @doc("Sets the prompt which binds the agent to its purpose.") conscience?: string; + @doc("Injects pre-existing conversation history into the AI session at startup. This allows you to seed the AI agent with context from a previous conversation or provide example interactions.") + convo?: ConversationMessage[]; + @doc("Used by `check_for_input` and `save_conversation` to identify an individual conversation.") conversation_id?: string; @@ -115,6 +171,9 @@ model AIParams { @doc("Each interaction between the AI and end user is posted in real time to the established URL.") debug_webhook_url?: url; + @doc("Enables debug mode for the AI session. When enabled, additional diagnostic information is logged including turn detection events, speech processing details, and internal state changes.") + debug?: boolean | integer | SWMLVar; + @doc("Forces the direction of the call to the assistant. Valid values are `inbound` and `outbound`.") direction?: Direction | SWMLVar; @@ -206,6 +265,15 @@ model AIParams { @maxValue(3600000) inactivity_timeout?: integer | SWMLVar; + @doc("Specifies the AI model to use for the inner dialog feature. Can be set to a different (often smaller/faster) model than the main conversation model. Only used when `enable_inner_dialog` is `true`.") + inner_dialog_model?: SupportedAIModels; + + @doc("The system prompt that guides the inner dialog AI's behavior. Only used when `enable_inner_dialog` is `true`.") + inner_dialog_prompt?: string = "The assistant is intelligent and straightforward, does its job well and is not excessively polite."; + + @doc("When enabled, synchronizes the inner dialog with the main conversation flow, waiting for user input before injection. Only used when `enable_inner_dialog` is `true`.") + inner_dialog_synced?: boolean | SWMLVar = false; + @doc("Amount of time, in ms, to wait before starting the conversation. Allowed values from `0` - `300,000`.") @minValue(0) @maxValue(300000) @@ -242,6 +310,11 @@ model AIParams { @maxValue(30) max_emotion?: integer | SWMLVar; + @doc("Sets the maximum number of tokens the AI model can generate in a single response. Lower values produce shorter responses and reduce latency.") + @minValue(1) + @maxValue(16384) + max_response_tokens?: integer | SWMLVar; + @doc("The OpenAI ASR (Automatic Speech Recognition) engine to use. Allowed values are `nova-2` and `nova-3`. Defaults to `nova-3`.") openai_asr_engine?: "nova-2" | "nova-3"; @@ -250,6 +323,16 @@ model AIParams { @maxValue(600000) outbound_attention_timeout?: integer | SWMLVar; + @doc(""" + When enabled, the `global_data` object is automatically saved to a channel variable + and restored when a new AI session starts on the same call. This allows data to persist + across multiple AI agent invocations within the same call. + """) + persist_global_data?: boolean | SWMLVar = true; + + @doc("Specifies the output format for structured prompts when using the `pom` array in prompt definitions. Valid values are `markdown` or `xml`.") + pom_format?: "markdown" | "xml" = "markdown"; + @doc(""" Send a summary of the conversation after the call ends. This requires a `post_url` to be set in the ai parameters and the `conversation_id` defined below. @@ -272,6 +355,19 @@ model AIParams { @maxValue(600000) speech_timeout?: integer | SWMLVar; + @doc(""" + When enabled, the AI agent remains silent until directly addressed by name (using `ai_name`). + The user must say the agent's name to activate it for each interaction. + Cannot be used together with `enable_pause`. + """) + speak_when_spoken_to?: boolean | SWMLVar = false; + + @doc(""" + When enabled, the AI agent starts in a paused state and will not respond until the user + speaks the agent's name (set via `ai_name`). Automatically enables `enable_pause`. + """) + start_paused?: boolean | SWMLVar = false; + @doc("The static greeting to play when the call is answered. This will always play at the beginning of the call.") static_greeting?: string; @@ -293,6 +389,13 @@ model AIParams { @doc("Allows SWAIG to set global data that persists across calls.") swaig_set_global_data?: boolean | SWMLVar; + @doc(""" + Controls whether SWML variables are included in SWAIG function webhook payloads. + When set to `true`, all SWML variables are posted. When set to an array of strings, + only the specified variable names are included. + """) + swaig_post_swml_vars?: boolean | string[] | SWMLVar; + @doc("The model to use for the AI's thinking capabilities. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`.") thinking_model?: SupportedAIModels; @@ -311,6 +414,15 @@ model AIParams { @doc("Pass a summary of a conversation from one AI agent to another. For example, transfer a call summary between support agents in two departments.") transfer_summary?: boolean | SWMLVar; + @doc(""" + Time in milliseconds to wait after detecting a potential end-of-turn before finalizing speech recognition. + A shorter timeout results in faster response times but may cut off the user if they pause mid-sentence. + Set to `0` to finalize immediately. Only used when `enable_turn_detection` is `true`. + """) + @minValue(0) + @maxValue(10000) + turn_detection_timeout?: integer | SWMLVar = 250; + @doc(""" The format for the AI agent to reference phone numbers. Allowed values are `international` and `national`. @@ -337,8 +449,22 @@ model AIParams { @doc("The model to use for the AI's vision capabilities. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`.") vision_model?: SupportedAIModels; + @doc(""" + Configures Silero Voice Activity Detection (VAD) settings. Format: `"threshold"` or `"threshold:frame_ms"`. + The threshold (0-100) sets sensitivity for detecting voice activity. + The optional frame_ms (16-40) sets frame duration in milliseconds. + """) + vad_config?: string; + @doc("When false, AI agent will initialize dialogue after call is setup. When true, agent will wait for the user to speak first.") wait_for_user?: boolean | SWMLVar; + @doc(""" + Specifies an additional prefix that must be spoken along with the agent's name (`ai_name`) + to wake the agent from a paused state. For example, if `ai_name` is "computer" and + `wake_prefix` is "hey", the user would need to say "hey computer" to activate the agent. + """) + wake_prefix?: string; + ...TypeSpec.Record; } diff --git a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json index 5877e0ce6..35c7869d0 100644 --- a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json +++ b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json @@ -3600,6 +3600,11 @@ ], "description": "The model to use for the AI. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`." }, + "ai_name": { + "type": "string", + "default": "computer", + "description": "Sets the name the AI agent responds to for wake/activation purposes. When using `enable_pause`, `start_paused`, or `speak_when_spoken_to`, the user must say this name to get the agent's attention. The name matching is case-insensitive." + }, "ai_volume": { "anyOf": [ { @@ -3613,6 +3618,11 @@ "maximum": 50, "description": "Adjust the volume of the AI. Allowed values from `-50` - `50`." }, + "app_name": { + "type": "string", + "default": "swml app", + "description": "A custom identifier for the AI application instance. This name is included in webhook payloads, allowing backend systems to identify which AI configuration made the request." + }, "asr_smart_format": { "anyOf": [ { @@ -3733,6 +3743,42 @@ ], "description": "Controls the barge behavior. Allowed values are `\"complete\"`, `\"partial\"`, `\"all\"`, or boolean.\n**Default:** `\"complete,partial\"`" }, + "enable_inner_dialog": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": false, + "description": "Enables the inner dialog feature, which runs a separate AI process in the background\nthat analyzes the conversation and provides real-time insights to the main AI agent.\nThis gives the agent a form of \"internal thought process\" that can help it make better decisions." + }, + "enable_pause": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": false, + "description": "Enables the pause/resume functionality for the AI agent. When enabled, a `pause_conversation`\nfunction is automatically added that the AI can call when the user says things like \"hold on\",\n\"wait\", or \"pause\". While paused, the agent stops responding until the user speaks the agent's\nname (set via `ai_name`) to resume. Cannot be used together with `speak_when_spoken_to`." + }, + "enable_turn_detection": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": true, + "description": "Enables intelligent turn detection that monitors partial speech transcripts for sentence-ending\npunctuation. When detected, the system can proactively finalize the speech recognition,\nreducing latency before the AI responds. Works with `turn_detection_timeout`." + }, "barge_match_string": { "type": "string", "description": "Takes a string, including a regular expression, defining barge behavior.\nFor example, this param can direct the AI to stop when the word 'hippopotomus' is input." @@ -3776,6 +3822,13 @@ "type": "string", "description": "Sets the prompt which binds the agent to its purpose." }, + "convo": { + "type": "array", + "items": { + "$ref": "#/$defs/ConversationMessage" + }, + "description": "Injects pre-existing conversation history into the AI session at startup. This allows you to seed the AI agent with context from a previous conversation or provide example interactions." + }, "conversation_id": { "type": "string", "description": "Used by `check_for_input` and `save_conversation` to identify an individual conversation." @@ -3809,6 +3862,20 @@ "format": "uri", "description": "Each interaction between the AI and end user is posted in real time to the established URL." }, + "debug": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "integer" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "description": "Enables debug mode for the AI session. When enabled, additional diagnostic information is logged including turn detection events, speech processing details, and internal state changes." + }, "direction": { "anyOf": [ { @@ -4002,6 +4069,43 @@ "maximum": 3600000, "description": "Amount of time, in ms, to wait before exiting the app due to inactivity. Allowed values from `10,000` - `3,600,000`. **Default:** `600000` ms (10 minutes)." }, + "inner_dialog_model": { + "anyOf": [ + { + "type": "string", + "const": "gpt-4o-mini" + }, + { + "type": "string", + "const": "gpt-4.1-mini" + }, + { + "type": "string", + "const": "gpt-4.1-nano" + }, + { + "type": "string" + } + ], + "description": "Specifies the AI model to use for the inner dialog feature. Can be set to a different (often smaller/faster) model than the main conversation model. Only used when `enable_inner_dialog` is `true`." + }, + "inner_dialog_prompt": { + "type": "string", + "default": "The assistant is intelligent and straightforward, does its job well and is not excessively polite.", + "description": "The system prompt that guides the inner dialog AI's behavior. Only used when `enable_inner_dialog` is `true`." + }, + "inner_dialog_synced": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": false, + "description": "When enabled, synchronizes the inner dialog with the main conversation flow, waiting for user input before injection. Only used when `enable_inner_dialog` is `true`." + }, "initial_sleep_ms": { "anyOf": [ { @@ -4073,6 +4177,19 @@ "maximum": 30, "description": "Sets the maximum emotion intensity for the AI voice. Allowed values from `1` - `30`. Default is `30`." }, + "max_response_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "minimum": 1, + "maximum": 16384, + "description": "Sets the maximum number of tokens the AI model can generate in a single response. Lower values produce shorter responses and reduce latency." + }, "openai_asr_engine": { "anyOf": [ { @@ -4099,6 +4216,32 @@ "maximum": 600000, "description": "Sets a time duration for the outbound call recipient to respond to the AI agent before timeout, in a range from `10000` to `600000`. **Default:** `120000` ms (2 minutes)." }, + "persist_global_data": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": true, + "description": "When enabled, the `global_data` object is automatically saved to a channel variable\nand restored when a new AI session starts on the same call. This allows data to persist\nacross multiple AI agent invocations within the same call." + }, + "pom_format": { + "anyOf": [ + { + "type": "string", + "const": "markdown" + }, + { + "type": "string", + "const": "xml" + } + ], + "default": "markdown", + "description": "Specifies the output format for structured prompts when using the `pom` array in prompt definitions. Valid values are `markdown` or `xml`." + }, "save_conversation": { "anyOf": [ { @@ -4149,6 +4292,30 @@ "maximum": 600000, "description": "Overall speech timeout, in ms. Allowed values from `0` - `600,000`. Default is `60000` ms." }, + "speak_when_spoken_to": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": false, + "description": "When enabled, the AI agent remains silent until directly addressed by name (using `ai_name`).\nThe user must say the agent's name to activate it for each interaction.\nCannot be used together with `enable_pause`." + }, + "start_paused": { + "anyOf": [ + { + "type": "boolean" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": false, + "description": "When enabled, the AI agent starts in a paused state and will not respond until the user\nspeaks the agent's name (set via `ai_name`). Automatically enables `enable_pause`." + }, "static_greeting": { "type": "string", "description": "The static greeting to play when the call is answered. This will always play at the beginning of the call." @@ -4224,6 +4391,23 @@ ], "description": "Allows SWAIG to set global data that persists across calls." }, + "swaig_post_swml_vars": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "array", + "items": { + "type": "string" + } + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "description": "Controls whether SWML variables are included in SWAIG function webhook payloads.\nWhen set to `true`, all SWML variables are posted. When set to an array of strings,\nonly the specified variable names are included." + }, "thinking_model": { "anyOf": [ { @@ -4279,6 +4463,20 @@ ], "description": "Pass a summary of a conversation from one AI agent to another. For example, transfer a call summary between support agents in two departments." }, + "turn_detection_timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "$ref": "#/$defs/SWMLVar" + } + ], + "default": 250, + "minimum": 0, + "maximum": 10000, + "description": "Time in milliseconds to wait after detecting a potential end-of-turn before finalizing speech recognition.\nA shorter timeout results in faster response times but may cut off the user if they pause mid-sentence.\nSet to `0` to finalize immediately. Only used when `enable_turn_detection` is `true`." + }, "tts_number_format": { "anyOf": [ { @@ -4338,6 +4536,10 @@ ], "description": "The model to use for the AI's vision capabilities. Allowed values are `gpt-4o-mini`, `gpt-4.1-mini`, and `gpt-4.1-nano`." }, + "vad_config": { + "type": "string", + "description": "Configures Silero Voice Activity Detection (VAD) settings. Format: `\"threshold\"` or `\"threshold:frame_ms\"`.\nThe threshold (0-100) sets sensitivity for detecting voice activity.\nThe optional frame_ms (16-40) sets frame duration in milliseconds." + }, "wait_for_user": { "anyOf": [ { @@ -4348,6 +4550,10 @@ } ], "description": "When false, AI agent will initialize dialogue after call is setup. When true, agent will wait for the user to speak first." + }, + "wake_prefix": { + "type": "string", + "description": "Specifies an additional prefix that must be spoken along with the agent's name (`ai_name`)\nto wake the agent from a paused state. For example, if `ai_name` is \"computer\" and\n`wake_prefix` is \"hey\", the user would need to say \"hey computer\" to activate the agent." } }, "unevaluatedProperties": {}, @@ -5161,6 +5367,32 @@ "minimum": 10000, "maximum": 600000 }, + "ConversationMessage": { + "type": "object", + "properties": { + "role": { + "$ref": "#/$defs/ConversationRole", + "description": "The role of the message sender." + }, + "content": { + "type": "string", + "description": "The text content of the message." + }, + "lang": { + "type": "string", + "description": "Optional language code for the message (e.g., 'en', 'es', 'fr')." + } + }, + "required": [ + "role", + "content" + ], + "unevaluatedProperties": { + "not": {} + }, + "description": "A message object representing a single turn in the conversation history.", + "title": "Conversation message object" + }, "Direction": { "type": "string", "enum": [ @@ -6191,6 +6423,15 @@ "not": {} } }, + "ConversationRole": { + "type": "string", + "enum": [ + "user", + "assistant", + "system" + ], + "title": "Conversation message role" + }, "POM": { "anyOf": [ { diff --git a/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx b/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx index 8a0ab7385..fbb9c87ca 100644 --- a/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx +++ b/website/docs/main/swml/reference/methods/ai/ai_params/index.mdx @@ -18,6 +18,7 @@ import APIField from "@site/src/components/APIField"; [interrupt-prompt]: ./interrupt_prompt.mdx [ai-languages]: ../ai_languages.mdx [ai-params]: ./index.mdx +[post-prompt-url]: /swml/methods/ai/post_prompt_url [get-visual-input]: /swml/methods/ai/swaig/internal_fillers#internal_fillers-parameters [tone-stream]: https://developer.signalwire.com/freeswitch/FreeSWITCH-Explained/Modules/mod_dptools_1970333 [iana-tz]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones @@ -46,6 +47,22 @@ These parameters control the fundamental behavior and capabilities of the AI age The AI model that the AI Agent will use during the conversation.
+ + Sets the name the AI agent responds to for wake/activation purposes. When using `enable_pause`, `start_paused`, or `speak_when_spoken_to`, the user must say this name to get the agent's attention. The name matching is case-insensitive. + + + + A custom identifier for the AI application instance. This name is included in webhook payloads (`post_prompt_url`, SWAIG function calls), allowing backend systems to identify which AI configuration made the request. + + + + Injects pre-existing conversation history into the AI session at startup. + This allows you to seed the AI agent with context from a previous conversation or provide example interactions. + + **Example:** + + ```yaml andJson + params: + convo: + - role: "user" + content: "Hi, I need help with my order" + - role: "assistant" + content: "Of course! I'd be happy to help. Could you please provide your order number?" + - role: "user" + content: "It's order number 12345" + lang: "en-US" + ``` + + + + + The role of the message sender. Valid values: + - `"user"` - A message from the human caller/user interacting with the AI agent. + - `"assistant"` - A message from the AI agent itself. + - `"system"` - A system message providing instructions or context to guide the AI's behavior. + + + + The text content of the message. + + + + The language code for the message. Uses standard ISO language codes such as `"en"`, `"en-US"`, `"es"`, `"fr"`, `"de"`, etc. + + + + + Enables the inner dialog feature, which runs a separate AI process in the background that analyzes the conversation and provides real-time insights to the main AI agent. This gives the agent a form of "internal thought process" that can help it make better decisions. + + + + Enables the pause/resume functionality for the AI agent. When enabled, a `pause_conversation` function is automatically added that the AI can call when the user says things like "hold on", "wait", or "pause". While paused, the agent stops responding until the user speaks the agent's name (set via `ai_name`) to resume. Cannot be used together with `speak_when_spoken_to`. + + **Important**: This may introduce a little bit of latency as the AI will use an additional turn in the conversation to think about the query. + + Enables intelligent turn detection that monitors partial speech transcripts for sentence-ending punctuation. When detected, the system can proactively finalize the speech recognition, reducing latency before the AI responds. Works with `turn_detection_timeout`. + + - Send a summary of the conversation after the call ends. This requires a `post_url` to be set in the [`ai parameters`][ai-params] and the `conversation_id` defined below. This eliminates the need for a `post_prompt` in the `ai` parameters. + Send a summary of the conversation after the call ends. This requires [`post_prompt_url`][post-prompt-url] to be set and the `conversation_id` defined. This eliminates the need for a `post_prompt` in the `ai` parameters. + + Enables debug mode for the AI session. When set to `true` or a positive integer, additional debug information is logged and may be included in webhook payloads. Higher integer values increase verbosity. + + Enable verbose logging (developer mode only). + +### Inner Dialog + +Configure the inner dialog feature, which enables a secondary AI process to analyze conversations in real-time and provide insights to the main AI agent. + + + Specifies the AI model to use for the inner dialog feature. If not set, the main `ai_model` is used. This allows you to use a different (potentially faster or cheaper) model for background analysis. + + + + The system prompt that guides the inner dialog AI's behavior. This prompt shapes how the background AI analyzes the conversation and what kind of insights it provides to the main agent. + + + + When enabled, synchronizes the inner dialog with the main conversation flow. This ensures the inner dialog AI waits for the main conversation turn to complete before providing its analysis, rather than running fully asynchronously. + + +### Pause & Wake + +Control the agent's listening behavior, including pause/resume functionality and activation triggers for hands-free scenarios. + + + When enabled, the AI agent remains silent until directly addressed by name (set via `ai_name`). This creates a "push-to-talk" style interaction where the agent only responds when explicitly called upon, useful for scenarios where the agent should listen but not interrupt. + + + + When enabled, the AI agent starts in a paused state. The agent will not respond to any input until the user speaks the agent's name (set via `ai_name`) to activate it. This is useful for scenarios where you want the agent to wait for explicit activation. + + + + Specifies an additional prefix that must be spoken along with the agent's name to wake the agent. For example, if `ai_name` is "assistant" and `wake_prefix` is "hey", the user would need to say "hey assistant" to activate the agent. + + +### Advanced Configuration + +Fine-tune advanced AI behavior settings including response limits, data persistence, prompt formatting, and voice activity detection. + + + Sets the maximum number of tokens the AI model can generate in a single response. Allowed values from `1` to `16384`. This helps control response length and costs. + + + + When enabled, `global_data` persists across multiple AI agent invocations within the same call. This allows data set by SWAIG functions to be retained if the AI agent is invoked multiple times during a single call session. + + + + Specifies the output format for structured prompts sent to the AI model. Valid values are `markdown` or `xml`. This affects how system prompts and context are formatted when sent to the underlying language model. + + + + Controls whether SWML variables are included in SWAIG function webhook payloads. When set to `true`, all SWML variables are posted. When set to an array of strings, only the specified variable names are included in the payload. + + + + Time in milliseconds to wait after detecting a potential end-of-turn before finalizing speech recognition. Works with `enable_turn_detection`. Lower values make the agent more responsive but may cut off users mid-sentence. Allowed values from `0` to `10000`. + + + + Configures Silero Voice Activity Detection (VAD) settings. Format: `threshold` or `threshold:frame_ms`. The threshold (0-100) sets sensitivity for voice detection, and optional frame_ms (16-40) sets the analysis frame duration in milliseconds. +