fix(ios): guard talk TTS callbacks to active utterance (#33304)

Merged via squash.

Prepared head SHA: dd88886e41
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
This commit is contained in:
Mariano
2026-03-03 22:34:09 +00:00
committed by GitHub
parent a8dd9ffea1
commit 22e33ddda9
2 changed files with 34 additions and 5 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes ### Fixes
- Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan. - Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
- iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman.
- Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd. - Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd.
- Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin. - Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin.
- Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow. - Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow.

View File

@@ -12,6 +12,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
private let synth = AVSpeechSynthesizer() private let synth = AVSpeechSynthesizer()
private var speakContinuation: CheckedContinuation<Void, Error>? private var speakContinuation: CheckedContinuation<Void, Error>?
private var currentUtterance: AVSpeechUtterance? private var currentUtterance: AVSpeechUtterance?
private var didStartCallback: (() -> Void)?
private var currentToken = UUID() private var currentToken = UUID()
private var watchdog: Task<Void, Never>? private var watchdog: Task<Void, Never>?
@@ -26,17 +27,23 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
self.currentToken = UUID() self.currentToken = UUID()
self.watchdog?.cancel() self.watchdog?.cancel()
self.watchdog = nil self.watchdog = nil
self.didStartCallback = nil
self.synth.stopSpeaking(at: .immediate) self.synth.stopSpeaking(at: .immediate)
self.finishCurrent(with: SpeakError.canceled) self.finishCurrent(with: SpeakError.canceled)
} }
public func speak(text: String, language: String? = nil) async throws { public func speak(
text: String,
language: String? = nil,
onStart: (() -> Void)? = nil
) async throws {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return } guard !trimmed.isEmpty else { return }
self.stop() self.stop()
let token = UUID() let token = UUID()
self.currentToken = token self.currentToken = token
self.didStartCallback = onStart
let utterance = AVSpeechUtterance(string: trimmed) let utterance = AVSpeechUtterance(string: trimmed)
if let language, let voice = AVSpeechSynthesisVoice(language: language) { if let language, let voice = AVSpeechSynthesisVoice(language: language) {
@@ -76,8 +83,13 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
} }
} }
private func handleFinish(error: Error?) { private func matchesCurrentUtterance(_ utteranceID: ObjectIdentifier) -> Bool {
guard self.currentUtterance != nil else { return } guard let currentUtterance = self.currentUtterance else { return false }
return ObjectIdentifier(currentUtterance) == utteranceID
}
private func handleFinish(utteranceID: ObjectIdentifier, error: Error?) {
guard self.matchesCurrentUtterance(utteranceID) else { return }
self.watchdog?.cancel() self.watchdog?.cancel()
self.watchdog = nil self.watchdog = nil
self.finishCurrent(with: error) self.finishCurrent(with: error)
@@ -85,6 +97,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
private func finishCurrent(with error: Error?) { private func finishCurrent(with error: Error?) {
self.currentUtterance = nil self.currentUtterance = nil
self.didStartCallback = nil
let cont = self.speakContinuation let cont = self.speakContinuation
self.speakContinuation = nil self.speakContinuation = nil
if let error { if let error {
@@ -96,12 +109,26 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
} }
extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate { extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didStart utterance: AVSpeechUtterance)
{
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in
guard self.matchesCurrentUtterance(utteranceID) else { return }
let callback = self.didStartCallback
self.didStartCallback = nil
callback?()
}
}
public nonisolated func speechSynthesizer( public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer, _ synthesizer: AVSpeechSynthesizer,
didFinish utterance: AVSpeechUtterance) didFinish utterance: AVSpeechUtterance)
{ {
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in Task { @MainActor in
self.handleFinish(error: nil) self.handleFinish(utteranceID: utteranceID, error: nil)
} }
} }
@@ -109,8 +136,9 @@ extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
_ synthesizer: AVSpeechSynthesizer, _ synthesizer: AVSpeechSynthesizer,
didCancel utterance: AVSpeechUtterance) didCancel utterance: AVSpeechUtterance)
{ {
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in Task { @MainActor in
self.handleFinish(error: SpeakError.canceled) self.handleFinish(utteranceID: utteranceID, error: SpeakError.canceled)
} }
} }
} }