refactor: split talk gateway config loaders

This commit is contained in:
Peter Steinberger
2026-03-08 16:15:46 +00:00
parent dc5645d459
commit 16a5f0b006
6 changed files with 359 additions and 154 deletions

View File

@@ -0,0 +1,110 @@
package ai.openclaw.app.voice
import ai.openclaw.app.normalizeMainKey
import kotlinx.serialization.json.JsonElement
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.JsonPrimitive
import kotlinx.serialization.json.booleanOrNull
import kotlinx.serialization.json.contentOrNull
internal data class TalkModeGatewayConfigState(
val activeProvider: String,
val normalizedPayload: Boolean,
val missingResolvedPayload: Boolean,
val mainSessionKey: String,
val defaultVoiceId: String?,
val voiceAliases: Map<String, String>,
val defaultModelId: String,
val defaultOutputFormat: String,
val apiKey: String?,
val interruptOnSpeech: Boolean?,
val silenceTimeoutMs: Long,
)
internal object TalkModeGatewayConfigParser {
fun parse(
config: JsonObject?,
defaultProvider: String,
defaultModelIdFallback: String,
defaultOutputFormatFallback: String,
envVoice: String?,
sagVoice: String?,
envKey: String?,
): TalkModeGatewayConfigState {
val talk = config?.get("talk").asObjectOrNull()
val selection = TalkModeManager.selectTalkProviderConfig(talk)
val activeProvider = selection?.provider ?: defaultProvider
val activeConfig = selection?.config
val sessionCfg = config?.get("session").asObjectOrNull()
val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull())
val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val aliases =
activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null
normalizeTalkAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id }
}?.toMap().orEmpty()
val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val outputFormat =
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
val silenceTimeoutMs = TalkModeManager.resolvedSilenceTimeoutMs(talk)
return TalkModeGatewayConfigState(
activeProvider = activeProvider,
normalizedPayload = selection?.normalizedPayload == true,
missingResolvedPayload = talk != null && selection == null,
mainSessionKey = mainKey,
defaultVoiceId =
if (activeProvider == defaultProvider) {
voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
} else {
voice
},
voiceAliases = aliases,
defaultModelId = model ?: defaultModelIdFallback,
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback,
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() },
interruptOnSpeech = interrupt,
silenceTimeoutMs = silenceTimeoutMs,
)
}
fun fallback(
defaultProvider: String,
defaultModelIdFallback: String,
defaultOutputFormatFallback: String,
envVoice: String?,
sagVoice: String?,
envKey: String?,
): TalkModeGatewayConfigState =
TalkModeGatewayConfigState(
activeProvider = defaultProvider,
normalizedPayload = false,
missingResolvedPayload = false,
mainSessionKey = "main",
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() },
voiceAliases = emptyMap(),
defaultModelId = defaultModelIdFallback,
defaultOutputFormat = defaultOutputFormatFallback,
apiKey = envKey?.takeIf { it.isNotEmpty() },
interruptOnSpeech = null,
silenceTimeoutMs = TalkDefaults.defaultSilenceTimeoutMs,
)
}
private fun normalizeTalkAliasKey(value: String): String =
value.trim().lowercase()
private fun JsonElement?.asStringOrNull(): String? =
this?.let { element ->
element as? JsonPrimitive
}?.contentOrNull
private fun JsonElement?.asBooleanOrNull(): Boolean? {
val primitive = this as? JsonPrimitive ?: return null
return primitive.booleanOrNull
}
private fun JsonElement?.asObjectOrNull(): JsonObject? =
this as? JsonObject

View File

@@ -1400,69 +1400,64 @@ class TalkModeManager(
try {
val res = session.request("talk.config", """{"includeSecrets":true}""")
val root = json.parseToJsonElement(res).asObjectOrNull()
val config = root?.get("config").asObjectOrNull()
val talk = config?.get("talk").asObjectOrNull()
val selection = selectTalkProviderConfig(talk)
if (talk != null && selection == null) {
val parsed =
TalkModeGatewayConfigParser.parse(
config = root?.get("config").asObjectOrNull(),
defaultProvider = defaultTalkProvider,
defaultModelIdFallback = defaultModelIdFallback,
defaultOutputFormatFallback = defaultOutputFormatFallback,
envVoice = envVoice,
sagVoice = sagVoice,
envKey = envKey,
)
if (parsed.missingResolvedPayload) {
Log.w(tag, "talk config ignored: normalized payload missing talk.resolved")
}
val activeProvider = selection?.provider ?: defaultTalkProvider
val activeConfig = selection?.config
val sessionCfg = config?.get("session").asObjectOrNull()
val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull())
val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val aliases =
activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null
normalizeAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id }
}?.toMap().orEmpty()
val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val outputFormat =
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
val silenceTimeoutMs = resolvedSilenceTimeoutMs(talk)
if (!isCanonicalMainSessionKey(mainSessionKey)) {
mainSessionKey = mainKey
mainSessionKey = parsed.mainSessionKey
}
defaultVoiceId =
if (activeProvider == defaultTalkProvider) {
voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
} else {
voice
}
voiceAliases = aliases
defaultVoiceId = parsed.defaultVoiceId
voiceAliases = parsed.voiceAliases
if (!voiceOverrideActive) currentVoiceId = defaultVoiceId
defaultModelId = model ?: defaultModelIdFallback
defaultModelId = parsed.defaultModelId
if (!modelOverrideActive) currentModelId = defaultModelId
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
silenceWindowMs = silenceTimeoutMs
defaultOutputFormat = parsed.defaultOutputFormat
apiKey = parsed.apiKey
silenceWindowMs = parsed.silenceTimeoutMs
Log.d(
tag,
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=$silenceTimeoutMs",
"reloadConfig apiKey=${if (apiKey != null) "set" else "null"} voiceId=$defaultVoiceId silenceTimeoutMs=${parsed.silenceTimeoutMs}",
)
if (interrupt != null) interruptOnSpeech = interrupt
activeProviderIsElevenLabs = activeProvider == defaultTalkProvider
if (parsed.interruptOnSpeech != null) interruptOnSpeech = parsed.interruptOnSpeech
activeProviderIsElevenLabs = parsed.activeProvider == defaultTalkProvider
if (!activeProviderIsElevenLabs) {
// Clear ElevenLabs credentials so playAssistant won't attempt ElevenLabs calls
apiKey = null
defaultVoiceId = null
if (!voiceOverrideActive) currentVoiceId = null
Log.w(tag, "talk provider $activeProvider unsupported; using system voice fallback")
} else if (selection?.normalizedPayload == true) {
Log.w(tag, "talk provider ${parsed.activeProvider} unsupported; using system voice fallback")
} else if (parsed.normalizedPayload) {
Log.d(tag, "talk config provider=elevenlabs")
}
configLoaded = true
} catch (_: Throwable) {
silenceWindowMs = TalkDefaults.defaultSilenceTimeoutMs
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
defaultModelId = defaultModelIdFallback
val fallback =
TalkModeGatewayConfigParser.fallback(
defaultProvider = defaultTalkProvider,
defaultModelIdFallback = defaultModelIdFallback,
defaultOutputFormatFallback = defaultOutputFormatFallback,
envVoice = envVoice,
sagVoice = sagVoice,
envKey = envKey,
)
silenceWindowMs = fallback.silenceTimeoutMs
defaultVoiceId = fallback.defaultVoiceId
defaultModelId = fallback.defaultModelId
if (!modelOverrideActive) currentModelId = defaultModelId
apiKey = envKey?.takeIf { it.isNotEmpty() }
voiceAliases = emptyMap()
defaultOutputFormat = defaultOutputFormatFallback
apiKey = fallback.apiKey
voiceAliases = fallback.voiceAliases
defaultOutputFormat = fallback.defaultOutputFormat
// Keep config load retryable after transient fetch failures.
configLoaded = false
}