mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-21 09:44:58 +00:00
fix(ios): auto-fallback from PCM to MP3 for ElevenLabs TTS
The default output format pcm_44100 requires an ElevenLabs Pro tier
subscription. Users on free or starter plans get a silent 403 failure
and hear no audio.
Instead of hardcoding mp3, keep pcm_44100 as the default (better
quality for Pro users) but remember the failure: when a PCM request
is rejected, set pcmFormatUnavailable and use mp3_44100_128 for all
subsequent requests in the session. The flag resets on config reload
so it re-probes after reconnection.
Also standardize the MP3 fallback format from mp3_44100 to
mp3_44100_128 for consistent bitrate.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
(cherry picked from commit fbc26ef9f3)
This commit is contained in:
@@ -72,6 +72,9 @@ final class TalkModeManager: NSObject {
|
||||
private var mainSessionKey: String = "main"
|
||||
private var fallbackVoiceId: String?
|
||||
private var lastPlaybackWasPCM: Bool = false
|
||||
/// Set when the ElevenLabs API rejects PCM format (e.g. 403 subscription_required).
|
||||
/// Once set, all subsequent requests in this session use MP3 instead of re-trying PCM.
|
||||
private var pcmFormatUnavailable: Bool = false
|
||||
var pcmPlayer: PCMStreamingAudioPlaying = PCMStreamingAudioPlayer.shared
|
||||
var mp3Player: StreamingAudioPlaying = StreamingAudioPlayer.shared
|
||||
|
||||
@@ -1007,7 +1010,8 @@ final class TalkModeManager: NSObject {
|
||||
let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(requestedOutputFormat ?? "pcm_44100")
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(
|
||||
requestedOutputFormat ?? self.effectiveDefaultOutputFormat)
|
||||
if outputFormat == nil, let requestedOutputFormat {
|
||||
self.logger.warning(
|
||||
"talk output_format unsupported for local playback: \(requestedOutputFormat, privacy: .public)")
|
||||
@@ -1054,8 +1058,9 @@ final class TalkModeManager: NSObject {
|
||||
self.lastPlaybackWasPCM = true
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
self.pcmFormatUnavailable = true
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
voiceId: voiceId,
|
||||
@@ -1391,7 +1396,7 @@ final class TalkModeManager: NSObject {
|
||||
|
||||
private func resolveIncrementalPrefetchOutputFormat(context: IncrementalSpeechContext) -> String? {
|
||||
if TalkTTSValidation.pcmSampleRate(from: context.outputFormat) != nil {
|
||||
return ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
return ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
}
|
||||
return context.outputFormat
|
||||
}
|
||||
@@ -1480,7 +1485,8 @@ final class TalkModeManager: NSObject {
|
||||
let desiredOutputFormat = (directive?.outputFormat ?? self.defaultOutputFormat)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let requestedOutputFormat = (desiredOutputFormat?.isEmpty == false) ? desiredOutputFormat : nil
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(requestedOutputFormat ?? "pcm_44100")
|
||||
let outputFormat = ElevenLabsTTSClient.validatedOutputFormat(
|
||||
requestedOutputFormat ?? self.effectiveDefaultOutputFormat)
|
||||
if outputFormat == nil, let requestedOutputFormat {
|
||||
self.logger.warning(
|
||||
"talk output_format unsupported for local playback: \(requestedOutputFormat, privacy: .public)")
|
||||
@@ -1531,6 +1537,11 @@ final class TalkModeManager: NSObject {
|
||||
latencyTier: TalkTTSValidation.validatedLatencyTier(context.directive?.latencyTier))
|
||||
}
|
||||
|
||||
/// Returns `mp3_44100_128` when the API has already rejected PCM, otherwise `pcm_44100`.
|
||||
private var effectiveDefaultOutputFormat: String {
|
||||
self.pcmFormatUnavailable ? "mp3_44100_128" : "pcm_44100"
|
||||
}
|
||||
|
||||
private static func makeBufferedAudioStream(chunks: [Data]) -> AsyncThrowingStream<Data, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
for chunk in chunks {
|
||||
@@ -1586,8 +1597,9 @@ final class TalkModeManager: NSObject {
|
||||
var playback = await self.pcmPlayer.play(stream: stream, sampleRate: sampleRate)
|
||||
if !playback.finished, playback.interruptedAt == nil {
|
||||
self.logger.warning("pcm playback failed; retrying mp3")
|
||||
self.pcmFormatUnavailable = true
|
||||
self.lastPlaybackWasPCM = false
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100")
|
||||
let mp3Format = ElevenLabsTTSClient.validatedOutputFormat("mp3_44100_128")
|
||||
let mp3Stream = client.streamSynthesize(
|
||||
voiceId: voiceId,
|
||||
request: self.makeIncrementalTTSRequest(
|
||||
@@ -1998,6 +2010,7 @@ extension TalkModeManager {
|
||||
self.gatewayTalkDefaultModelId = nil
|
||||
self.gatewayTalkApiKeyConfigured = false
|
||||
self.gatewayTalkConfigLoaded = false
|
||||
self.pcmFormatUnavailable = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user