mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 10:55:07 +00:00
TTS: add baseUrl support to OpenAI TTS config (#34321)
Merged via squash.
Prepared head SHA: e9a10cf81d
Co-authored-by: RealKai42 <44634134+RealKai42@users.noreply.github.com>
Co-authored-by: shakkernerd <165377636+shakkernerd@users.noreply.github.com>
Reviewed-by: @shakkernerd
This commit is contained in:
@@ -18,6 +18,7 @@ import type {
|
||||
} from "./tts.js";
|
||||
|
||||
const DEFAULT_ELEVENLABS_BASE_URL = "https://api.elevenlabs.io";
|
||||
export const DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
||||
const TEMP_FILE_CLEANUP_DELAY_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
export function isValidVoiceId(voiceId: string): boolean {
|
||||
@@ -32,6 +33,14 @@ function normalizeElevenLabsBaseUrl(baseUrl: string): string {
|
||||
return trimmed.replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
function normalizeOpenAITtsBaseUrl(baseUrl?: string): string {
|
||||
const trimmed = baseUrl?.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_OPENAI_BASE_URL;
|
||||
}
|
||||
return trimmed.replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
function requireInRange(value: number, min: number, max: number, label: string): void {
|
||||
if (!Number.isFinite(value) || value < min || value > max) {
|
||||
throw new Error(`${label} must be between ${min} and ${max}`);
|
||||
@@ -99,6 +108,7 @@ function parseNumberValue(value: string): number | undefined {
|
||||
export function parseTtsDirectives(
|
||||
text: string,
|
||||
policy: ResolvedTtsModelOverrides,
|
||||
openaiBaseUrl?: string,
|
||||
): TtsDirectiveParseResult {
|
||||
if (!policy.enabled) {
|
||||
return { cleanedText: text, overrides: {}, warnings: [], hasDirective: false };
|
||||
@@ -151,7 +161,7 @@ export function parseTtsDirectives(
|
||||
if (!policy.allowVoice) {
|
||||
break;
|
||||
}
|
||||
if (isValidOpenAIVoice(rawValue)) {
|
||||
if (isValidOpenAIVoice(rawValue, openaiBaseUrl)) {
|
||||
overrides.openai = { ...overrides.openai, voice: rawValue };
|
||||
} else {
|
||||
warnings.push(`invalid OpenAI voice "${rawValue}"`);
|
||||
@@ -180,7 +190,7 @@ export function parseTtsDirectives(
|
||||
if (!policy.allowModelId) {
|
||||
break;
|
||||
}
|
||||
if (isValidOpenAIModel(rawValue)) {
|
||||
if (isValidOpenAIModel(rawValue, openaiBaseUrl)) {
|
||||
overrides.openai = { ...overrides.openai, model: rawValue };
|
||||
} else {
|
||||
overrides.elevenlabs = { ...overrides.elevenlabs, modelId: rawValue };
|
||||
@@ -335,14 +345,14 @@ export const OPENAI_TTS_MODELS = ["gpt-4o-mini-tts", "tts-1", "tts-1-hd"] as con
|
||||
* Note: Read at runtime (not module load) to support config.env loading.
|
||||
*/
|
||||
function getOpenAITtsBaseUrl(): string {
|
||||
return (process.env.OPENAI_TTS_BASE_URL?.trim() || "https://api.openai.com/v1").replace(
|
||||
/\/+$/,
|
||||
"",
|
||||
);
|
||||
return normalizeOpenAITtsBaseUrl(process.env.OPENAI_TTS_BASE_URL);
|
||||
}
|
||||
|
||||
function isCustomOpenAIEndpoint(): boolean {
|
||||
return getOpenAITtsBaseUrl() !== "https://api.openai.com/v1";
|
||||
function isCustomOpenAIEndpoint(baseUrl?: string): boolean {
|
||||
if (baseUrl != null) {
|
||||
return normalizeOpenAITtsBaseUrl(baseUrl) !== DEFAULT_OPENAI_BASE_URL;
|
||||
}
|
||||
return getOpenAITtsBaseUrl() !== DEFAULT_OPENAI_BASE_URL;
|
||||
}
|
||||
export const OPENAI_TTS_VOICES = [
|
||||
"alloy",
|
||||
@@ -363,17 +373,17 @@ export const OPENAI_TTS_VOICES = [
|
||||
|
||||
type OpenAiTtsVoice = (typeof OPENAI_TTS_VOICES)[number];
|
||||
|
||||
export function isValidOpenAIModel(model: string): boolean {
|
||||
export function isValidOpenAIModel(model: string, baseUrl?: string): boolean {
|
||||
// Allow any model when using custom endpoint (e.g., Kokoro, LocalAI)
|
||||
if (isCustomOpenAIEndpoint()) {
|
||||
if (isCustomOpenAIEndpoint(baseUrl)) {
|
||||
return true;
|
||||
}
|
||||
return OPENAI_TTS_MODELS.includes(model as (typeof OPENAI_TTS_MODELS)[number]);
|
||||
}
|
||||
|
||||
export function isValidOpenAIVoice(voice: string): voice is OpenAiTtsVoice {
|
||||
export function isValidOpenAIVoice(voice: string, baseUrl?: string): voice is OpenAiTtsVoice {
|
||||
// Allow any voice when using custom endpoint (e.g., Kokoro Chinese voices)
|
||||
if (isCustomOpenAIEndpoint()) {
|
||||
if (isCustomOpenAIEndpoint(baseUrl)) {
|
||||
return true;
|
||||
}
|
||||
return OPENAI_TTS_VOICES.includes(voice as OpenAiTtsVoice);
|
||||
@@ -591,17 +601,18 @@ export async function elevenLabsTTS(params: {
|
||||
export async function openaiTTS(params: {
|
||||
text: string;
|
||||
apiKey: string;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
voice: string;
|
||||
responseFormat: "mp3" | "opus" | "pcm";
|
||||
timeoutMs: number;
|
||||
}): Promise<Buffer> {
|
||||
const { text, apiKey, model, voice, responseFormat, timeoutMs } = params;
|
||||
const { text, apiKey, baseUrl, model, voice, responseFormat, timeoutMs } = params;
|
||||
|
||||
if (!isValidOpenAIModel(model)) {
|
||||
if (!isValidOpenAIModel(model, baseUrl)) {
|
||||
throw new Error(`Invalid model: ${model}`);
|
||||
}
|
||||
if (!isValidOpenAIVoice(voice)) {
|
||||
if (!isValidOpenAIVoice(voice, baseUrl)) {
|
||||
throw new Error(`Invalid voice: ${voice}`);
|
||||
}
|
||||
|
||||
@@ -609,7 +620,7 @@ export async function openaiTTS(params: {
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${getOpenAITtsBaseUrl()}/audio/speech`, {
|
||||
const response = await fetch(`${baseUrl}/audio/speech`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
|
||||
Reference in New Issue
Block a user