mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 23:14:32 +00:00
fix(tts): use opus format and enable voice bubbles for feishu and whatsapp (#27366)
* fix(tts): use opus format and enable voice bubbles for feishu and whatsapp Previously only Telegram received opus output and had `shouldVoice=true`. Feishu and WhatsApp also support voice-bubble playback and require opus audio, but were falling back to mp3 with `audioAsVoice=false`. - Extract VOICE_BUBBLE_CHANNELS set (telegram, feishu, whatsapp) - resolveOutputFormat: return TELEGRAM_OUTPUT (opus) for all voice-bubble channels - shouldVoice: enable for all voice-bubble channels, not just telegram - Update test to cover feishu and whatsapp cases * Changelog: add TTS voice-bubble channel coverage note --------- Co-authored-by: Ning Hu <ninghu@Nings-MacBook-Pro.local> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
@@ -39,6 +39,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Feishu/Docx convert fallback chunking: recursively split oversized markdown chunks (including long no-heading sections) when `document.convert` hits content limits, while keeping fenced-code-aware split boundaries whenever possible. (#14402) Thanks @lml2468.
|
- Feishu/Docx convert fallback chunking: recursively split oversized markdown chunks (including long no-heading sections) when `document.convert` hits content limits, while keeping fenced-code-aware split boundaries whenever possible. (#14402) Thanks @lml2468.
|
||||||
- Feishu/Inbound media regression coverage: add explicit tests for message resource type mapping (`image` stays `image`, non-image maps to `file`) to prevent reintroducing unsupported Feishu `type=audio` fetches. (#16311, #8746) Thanks @Yaxuan42.
|
- Feishu/Inbound media regression coverage: add explicit tests for message resource type mapping (`image` stays `image`, non-image maps to `file`) to prevent reintroducing unsupported Feishu `type=audio` fetches. (#16311, #8746) Thanks @Yaxuan42.
|
||||||
- Feishu/API quota controls: add `typingIndicator` and `resolveSenderNames` config flags (top-level and per-account) so operators can disable typing reactions and sender-name lookup requests while keeping default behavior unchanged. (#10513) Thanks @BigUncle.
|
- Feishu/API quota controls: add `typingIndicator` and `resolveSenderNames` config flags (top-level and per-account) so operators can disable typing reactions and sender-name lookup requests while keeping default behavior unchanged. (#10513) Thanks @BigUncle.
|
||||||
|
- TTS/Voice bubbles: use opus output and enable `audioAsVoice` routing for Feishu and WhatsApp (in addition to Telegram) so supported channels receive voice-bubble playback instead of file-style audio attachments. (#27366) Thanks @smthfoxy.
|
||||||
- Security/Feishu webhook ingress: bound unauthenticated webhook rate-limit state with stale-window pruning and a hard key cap to prevent unbounded pre-auth memory growth from rotating source keys. (#26050) Thanks @bmendonca3.
|
- Security/Feishu webhook ingress: bound unauthenticated webhook rate-limit state with stale-window pruning and a hard key cap to prevent unbounded pre-auth memory growth from rotating source keys. (#26050) Thanks @bmendonca3.
|
||||||
- Security/Compaction audit: remove the post-compaction audit injection message. (#28507) Thanks @fuller-stack-dev and @vincentkoc.
|
- Security/Compaction audit: remove the post-compaction audit injection message. (#28507) Thanks @fuller-stack-dev and @vincentkoc.
|
||||||
- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
|
- Telegram/Reply media context: include replied media files in inbound context when replying to media, defer reply-media downloads to debounce flush, gate reply-media fetch behind DM authorization, and preserve replied media when non-vision sticker fallback runs (including cached-sticker paths). (#28488) Thanks @obviyus.
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ describe("tts", () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe("resolveOutputFormat", () => {
|
describe("resolveOutputFormat", () => {
|
||||||
it("selects opus for Telegram and mp3 for other channels", () => {
|
it("selects opus for voice-bubble channels (telegram/feishu/whatsapp) and mp3 for others", () => {
|
||||||
const cases = [
|
const cases = [
|
||||||
{
|
{
|
||||||
channel: "telegram",
|
channel: "telegram",
|
||||||
@@ -165,6 +165,24 @@ describe("tts", () => {
|
|||||||
voiceCompatible: true,
|
voiceCompatible: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
channel: "feishu",
|
||||||
|
expected: {
|
||||||
|
openai: "opus",
|
||||||
|
elevenlabs: "opus_48000_64",
|
||||||
|
extension: ".opus",
|
||||||
|
voiceCompatible: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
channel: "whatsapp",
|
||||||
|
expected: {
|
||||||
|
openai: "opus",
|
||||||
|
elevenlabs: "opus_48000_64",
|
||||||
|
extension: ".opus",
|
||||||
|
voiceCompatible: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
channel: "discord",
|
channel: "discord",
|
||||||
expected: {
|
expected: {
|
||||||
|
|||||||
@@ -480,8 +480,11 @@ export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void {
|
|||||||
lastTtsAttempt = entry;
|
lastTtsAttempt = entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Channels that require opus audio and support voice-bubble playback */
|
||||||
|
const VOICE_BUBBLE_CHANNELS = new Set(["telegram", "feishu", "whatsapp"]);
|
||||||
|
|
||||||
function resolveOutputFormat(channelId?: string | null) {
|
function resolveOutputFormat(channelId?: string | null) {
|
||||||
if (channelId === "telegram") {
|
if (channelId && VOICE_BUBBLE_CHANNELS.has(channelId)) {
|
||||||
return TELEGRAM_OUTPUT;
|
return TELEGRAM_OUTPUT;
|
||||||
}
|
}
|
||||||
return DEFAULT_OUTPUT;
|
return DEFAULT_OUTPUT;
|
||||||
@@ -911,7 +914,8 @@ export async function maybeApplyTtsToPayload(params: {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const channelId = resolveChannelId(params.channel);
|
const channelId = resolveChannelId(params.channel);
|
||||||
const shouldVoice = channelId === "telegram" && result.voiceCompatible === true;
|
const shouldVoice =
|
||||||
|
channelId !== null && VOICE_BUBBLE_CHANNELS.has(channelId) && result.voiceCompatible === true;
|
||||||
const finalPayload = {
|
const finalPayload = {
|
||||||
...nextPayload,
|
...nextPayload,
|
||||||
mediaUrl: result.audioPath,
|
mediaUrl: result.audioPath,
|
||||||
|
|||||||
Reference in New Issue
Block a user