Universal Audio API
Unified API for speech-to-text-transcription models、speech-to-text-translation models、text-to-speech-creation models
API Overview
To simplify the integration of different speech-to-text-transcription models (stt)、speech-to-text-translation models (stt)、text-to-speech-creation models (tts), OneRouter provides a unified image API.
API Specification
text-to-speech-creation models (tts)
Generates audio from the input text.
curl https://audio.onerouter.pro/v1/audio/speech \
-H "Content-Type: application/json" \
-H "Authorization: <API_KEY>" \
-d '{
"model": "gpt-4o-mini-tts",
"input": "A cute baby sea otter",
"voice": "alloy"
}' \
--output speech.mp3import os
import json
import requests
API_URL = "https://audio.onerouter.pro/v1/audio/speech"
API_KEY = os.getenv("ONEROUTER_API_KEY")
if not API_KEY:
raise RuntimeError("Please set the ONEROUTER_API_KEY")
payload = {
"model": "gpt-4o-mini-tts",
"input": "A cute baby sea otter.",
"voice": "alloy",
"response_format": "mp3"
}
headers = {
"Authorization": API_KEY,
"Content-Type": "application/json"
}
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
response.raise_for_status()
out_path = os.path.join(os.path.dirname(__file__), "tts-output.mp3")
with open(out_path, "wb") as f:
f.write(response.content)
print(f"Saved to: {out_path}")<API_KEY>is your API Key generated in API page.modelis the model name, such asgpt-4o-mini-tts, available model list can be access in Model page.The
voiceto use when generating the audio. Supported voices arealloy,ash,ballad,coral,echo,fable,onyx,nova,sage,shimmer, andverse.
Example response
speech-to-text-translation models (stt)
Translates audio into English.
curl https://audio.onerouter.pro/v1/audio/translations \
-H "Content-Type: multipart/form-data" \
-H "Authorization: <API_KEY>" \
--form 'file=@/path/to/file/speech.m4a' \
--form 'model="whisper-1"'from openai import OpenAI
import base64
import os
client = OpenAI(
api_key="<API_KEY>", # Replace with your Key "sk-***"
base_url="https://audio.onerouter.pro/v1"
)
audio_file = open("/path/to/file/speech.m4a", "rb")
transcript = client.audio.translations.create(
model="whisper-1",
file=audio_file
)
print(transcript)import os
import requests
API_URL = "https://audio.onerouter.pro/v1/audio/translations"
API_KEY = os.getenv("ONEROUTER_API_KEY")
if not API_KEY:
raise RuntimeError("Please set the ONEROUTER_API_KEY")
file_path = os.path.join(os.path.dirname(__file__), "tts-output.mp3")
with open(file_path, "rb") as f:
files = {"file": ("tts-output.mp3", f, "audio/mpeg")}
data = {
"model": "whisper-1",
"prompt": "loudly",
"response_format": "json",
"temperature": 0.8,
}
headers = {"Authorization": API_KEY}
response = requests.post(API_URL, headers=headers, files=files, data=data)
response.raise_for_status()
print(response.json())<API_KEY>is your API Key generated in API page.modelis the model name, such aswhisper-1, available model list can be access in Model page.fileis the audio file object (not file name) to transcribe, in one of these formats:flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav, orwebm.
Example response
{
"text": "Hello, my name is Wolfgang and I come from Germany. Where are you heading today?"
}speech-to-text-transcription models (stt)
Transcribes audio into the input language.
curl https://audio.onerouter.pro/v1/speech/transcriptions \
-H "Content-Type: multipart/form-data" \
-H "Authorization: <API_KEY>" \
--form 'file=@/path/to/file/speech.m4a' \
--form 'model="whisper-1"from openai import OpenAI
import base64
import os
client = OpenAI(
api_key="<API_KEY>", # Replace with your Key "sk-***"
base_url="https://audio.onerouter.pro/v1"
)
audio_file = open("/path/to/file/speech.m4a", "rb")
transcription = client.audio.transcriptions.create(
model="gpt-4o-transcribe",
file=audio_file
)
print(transcription)import os
import requests
API_URL = "https://audio.onerouter.pro/v1/audio/transcriptions"
API_KEY = os.getenv("ONEROUTER_API_KEY")
if not API_KEY:
raise RuntimeError("Please set the ONEROUTER_API_KEY")
file_path = os.path.join(os.path.dirname(__file__), "tts-output.mp3")
with open(file_path, "rb") as f:
files = {"file": ("tts-output.mp3", f, "audio/mpeg")}
data = {
"model": "gpt-4o-transcribe",
"prompt": "loudly",
"response_format": "json",
"temperature": 0.8,
}
headers = {"Authorization": API_KEY}
response = requests.post(API_URL, headers=headers, files=files, data=data)
response.raise_for_status()
print(response.json())<API_KEY>is your API Key generated in API page.modelis the model name, such aswhisper-1, available model list can be access in Model page.fileis the audio file object (not file name) to transcribe, in one of these formats:flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav, orwebm.
Example response
{
"text": "Imagine the wildest idea that you've ever had, and you're curious about how it might scale to something that's a 100, a 1,000 times bigger. This is a place where you can get to do that.",
"usage": {
"type": "tokens",
"input_tokens": 14,
"input_token_details": {
"text_tokens": 0,
"audio_tokens": 14
},
"output_tokens": 45,
"total_tokens": 59
}
}Last updated