feat(talk): add provider-agnostic config with legacy compatibility

This commit is contained in:
Nimrod Gutman
2026-02-21 21:47:39 +02:00
committed by Peter Steinberger
parent d1f28c954e
commit d58f71571a
19 changed files with 1003 additions and 109 deletions

View File

@@ -11,6 +11,7 @@ actor TalkModeRuntime {
private let logger = Logger(subsystem: "ai.openclaw", category: "talk.runtime")
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
private static let defaultModelIdFallback = "eleven_v3"
private static let defaultTalkProvider = "elevenlabs"
private final class RMSMeter: @unchecked Sendable {
private let lock = NSLock()
@@ -792,6 +793,48 @@ extension TalkModeRuntime {
let apiKey: String?
}
struct TalkProviderConfigSelection {
let provider: String
let config: [String: AnyCodable]
let normalizedPayload: Bool
}
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() ?? ""
return trimmed.isEmpty ? nil : trimmed
}
static func selectTalkProviderConfig(
_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection?
{
guard let talk else { return nil }
let rawProvider = talk["provider"]?.stringValue
let rawProviders = talk["providers"]?.dictionaryValue
let hasNormalizedPayload = rawProvider != nil || rawProviders != nil
if hasNormalizedPayload {
let normalizedProviders =
rawProviders?.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in
guard
let providerID = Self.normalizedTalkProviderID(entry.key),
let providerConfig = entry.value.dictionaryValue
else { return }
acc[providerID] = providerConfig
} ?? [:]
let providerID =
Self.normalizedTalkProviderID(rawProvider) ??
normalizedProviders.keys.sorted().first ??
Self.defaultTalkProvider
return TalkProviderConfigSelection(
provider: providerID,
config: normalizedProviders[providerID] ?? [:],
normalizedPayload: true)
}
return TalkProviderConfigSelection(
provider: Self.defaultTalkProvider,
config: talk,
normalizedPayload: false)
}
private func fetchTalkConfig() async -> TalkRuntimeConfig {
let env = ProcessInfo.processInfo.environment
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -804,13 +847,16 @@ extension TalkModeRuntime {
params: ["includeSecrets": AnyCodable(true)],
timeoutMs: 8000)
let talk = snap.config?["talk"]?.dictionaryValue
let selection = Self.selectTalkProviderConfig(talk)
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
let activeConfig = selection?.config
let ui = snap.config?["ui"]?.dictionaryValue
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
await MainActor.run {
AppStateStore.shared.seamColorHex = rawSeam.isEmpty ? nil : rawSeam
}
let voice = talk?["voiceId"]?.stringValue
let rawAliases = talk?["voiceAliases"]?.dictionaryValue
let voice = activeConfig?["voiceId"]?.stringValue
let rawAliases = activeConfig?["voiceAliases"]?.dictionaryValue
let resolvedAliases: [String: String] =
rawAliases?.reduce(into: [:]) { acc, entry in
let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
@@ -818,18 +864,30 @@ extension TalkModeRuntime {
guard !key.isEmpty, !value.isEmpty else { return }
acc[key] = value
} ?? [:]
let model = talk?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
let resolvedModel = (model?.isEmpty == false) ? model! : Self.defaultModelIdFallback
let outputFormat = talk?["outputFormat"]?.stringValue
let outputFormat = activeConfig?["outputFormat"]?.stringValue
let interrupt = talk?["interruptOnSpeech"]?.boolValue
let apiKey = talk?["apiKey"]?.stringValue
let resolvedVoice =
let apiKey = activeConfig?["apiKey"]?.stringValue
let resolvedVoice: String? = if activeProvider == Self.defaultTalkProvider {
(voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ??
(envVoice?.isEmpty == false ? envVoice : nil) ??
(sagVoice?.isEmpty == false ? sagVoice : nil)
let resolvedApiKey =
} else {
(voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil)
}
let resolvedApiKey: String? = if activeProvider == Self.defaultTalkProvider {
(envApiKey?.isEmpty == false ? envApiKey : nil) ??
(apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil)
} else {
nil
}
if activeProvider != Self.defaultTalkProvider {
self.ttsLogger
.info("talk provider \(activeProvider, privacy: .public) unsupported; using system voice")
} else if selection?.normalizedPayload == true {
self.ttsLogger.info("talk config provider elevenlabs")
}
return TalkRuntimeConfig(
voiceId: resolvedVoice,
voiceAliases: resolvedAliases,