elevenlabs · Schema
SpeechToTextRequest
Properties
| Name | Type | Description |
|---|---|---|
| file | string | The audio file to transcribe. Supports formats including MP3, WAV, FLAC, OGG, and M4A. |
| model_id | string | The identifier of the speech-to-text model to use for transcription. |
| language_code | string | Language code in ISO 639-1 format to hint the expected language of the audio content. |
| tag_audio_events | boolean | Whether to tag non-speech audio events such as music, laughter, or applause in the transcription output. |
| timestamps_granularity | string | The level of timestamp granularity to include in the response. |
JSON Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "#/components/schemas/SpeechToTextRequest",
"title": "SpeechToTextRequest",
"type": "object",
"required": [
"file"
],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "The audio file to transcribe. Supports formats including MP3, WAV, FLAC, OGG, and M4A."
},
"model_id": {
"type": "string",
"description": "The identifier of the speech-to-text model to use for transcription."
},
"language_code": {
"type": "string",
"description": "Language code in ISO 639-1 format to hint the expected language of the audio content."
},
"tag_audio_events": {
"type": "boolean",
"description": "Whether to tag non-speech audio events such as music, laughter, or applause in the transcription output.",
"default": false
},
"timestamps_granularity": {
"type": "string",
"description": "The level of timestamp granularity to include in the response.",
"enum": [
"none",
"word",
"character"
]
}
}
}