mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-07 11:21:23 +00:00
Discord VC: voice channels, transcription, and TTS (#18774)
This commit is contained in:
@@ -107,10 +107,7 @@ export function createOpenClawTools(options?: {
|
||||
sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl,
|
||||
allowHostControl: options?.allowHostBrowserControl,
|
||||
}),
|
||||
createCanvasTool({
|
||||
config: options?.config,
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
}),
|
||||
createCanvasTool({ config: options?.config }),
|
||||
createNodesTool({
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
config: options?.config,
|
||||
|
||||
@@ -1,15 +1,14 @@
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { writeBase64ToFile } from "../../cli/nodes-camera.js";
|
||||
import { canvasSnapshotTempPath, parseCanvasSnapshotPayload } from "../../cli/nodes-canvas.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { openFileWithinRoot, SafeOpenError } from "../../infra/fs-safe.js";
|
||||
import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../../globals.js";
|
||||
import { isInboundPathAllowed } from "../../media/inbound-path-policy.js";
|
||||
import { getDefaultMediaLocalRoots } from "../../media/local-roots.js";
|
||||
import { imageMimeFromFormat } from "../../media/mime.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import { resolveSessionAgentId } from "../agent-scope.js";
|
||||
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
|
||||
import { type AnyAgentTool, imageResult, jsonResult, readStringParam } from "./common.js";
|
||||
@@ -28,77 +27,27 @@ const CANVAS_ACTIONS = [
|
||||
|
||||
const CANVAS_SNAPSHOT_FORMATS = ["png", "jpg", "jpeg"] as const;
|
||||
|
||||
const PATH_SCHEME_RE = /^[a-z][a-z0-9+.-]*:/i;
|
||||
const WINDOWS_DRIVE_RE = /^[a-zA-Z]:[\\/]/;
|
||||
|
||||
function resolveJsonlLocalPath(rawPath: string): string {
|
||||
const trimmed = rawPath.trim();
|
||||
async function readJsonlFromPath(jsonlPath: string): Promise<string> {
|
||||
const trimmed = jsonlPath.trim();
|
||||
if (!trimmed) {
|
||||
return trimmed;
|
||||
return "";
|
||||
}
|
||||
if (trimmed.startsWith("file://")) {
|
||||
try {
|
||||
return fileURLToPath(trimmed);
|
||||
} catch (err) {
|
||||
throw new Error(`Invalid jsonlPath file URL: ${rawPath}`, { cause: err });
|
||||
const resolved = path.resolve(trimmed);
|
||||
const roots = getDefaultMediaLocalRoots();
|
||||
if (!isInboundPathAllowed({ filePath: resolved, roots })) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Blocked canvas jsonlPath outside allowed roots: ${resolved}`);
|
||||
}
|
||||
throw new Error("jsonlPath outside allowed roots");
|
||||
}
|
||||
if (PATH_SCHEME_RE.test(trimmed) && !WINDOWS_DRIVE_RE.test(trimmed)) {
|
||||
throw new Error("jsonlPath must be a local file path.");
|
||||
}
|
||||
if (trimmed.startsWith("~")) {
|
||||
return resolveUserPath(trimmed);
|
||||
}
|
||||
return path.resolve(trimmed);
|
||||
}
|
||||
|
||||
function resolveLocalRoot(filePath: string, roots: readonly string[]): string | null {
|
||||
const resolvedPath = path.resolve(filePath);
|
||||
for (const root of roots) {
|
||||
const resolvedRoot = path.resolve(root);
|
||||
const rel = path.relative(resolvedRoot, resolvedPath);
|
||||
if (!rel || (!rel.startsWith("..") && !path.isAbsolute(rel))) {
|
||||
return resolvedRoot;
|
||||
const canonical = await fs.realpath(resolved).catch(() => resolved);
|
||||
if (!isInboundPathAllowed({ filePath: canonical, roots })) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`Blocked canvas jsonlPath outside allowed roots: ${canonical}`);
|
||||
}
|
||||
throw new Error("jsonlPath outside allowed roots");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function readJsonlFromPath(params: {
|
||||
jsonlPath: string;
|
||||
localRoots: readonly string[];
|
||||
}): Promise<string> {
|
||||
const resolvedPath = resolveJsonlLocalPath(params.jsonlPath);
|
||||
const resolvedRoot = resolveLocalRoot(resolvedPath, params.localRoots);
|
||||
if (!resolvedRoot) {
|
||||
throw new Error("jsonlPath must be under an allowed directory.");
|
||||
}
|
||||
const relativePath = path.relative(resolvedRoot, resolvedPath);
|
||||
try {
|
||||
const opened = await openFileWithinRoot({
|
||||
rootDir: resolvedRoot,
|
||||
relativePath,
|
||||
});
|
||||
try {
|
||||
const buffer = await opened.handle.readFile();
|
||||
return buffer.toString("utf8");
|
||||
} finally {
|
||||
await opened.handle.close().catch(() => {});
|
||||
}
|
||||
} catch (err) {
|
||||
if (err instanceof SafeOpenError) {
|
||||
if (err.code === "not-found") {
|
||||
throw new Error("jsonlPath file not found.", { cause: err });
|
||||
}
|
||||
if (err.code === "not-file") {
|
||||
throw new Error("jsonlPath must be a regular file.", { cause: err });
|
||||
}
|
||||
throw new Error("jsonlPath must be a regular file within an allowed directory.", {
|
||||
cause: err,
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
return await fs.readFile(canonical, "utf8");
|
||||
}
|
||||
|
||||
// Flattened schema: runtime validates per-action requirements.
|
||||
@@ -128,15 +77,8 @@ const CanvasToolSchema = Type.Object({
|
||||
jsonlPath: Type.Optional(Type.String()),
|
||||
});
|
||||
|
||||
export function createCanvasTool(options?: {
|
||||
config?: OpenClawConfig;
|
||||
agentSessionKey?: string;
|
||||
}): AnyAgentTool {
|
||||
export function createCanvasTool(options?: { config?: OpenClawConfig }): AnyAgentTool {
|
||||
const imageSanitization = resolveImageSanitizationLimits(options?.config);
|
||||
const agentId = options?.agentSessionKey
|
||||
? resolveSessionAgentId({ sessionKey: options.agentSessionKey, config: options?.config })
|
||||
: undefined;
|
||||
const localRoots = getAgentScopedMediaLocalRoots(options?.config ?? {}, agentId);
|
||||
return {
|
||||
label: "Canvas",
|
||||
name: "canvas",
|
||||
@@ -254,10 +196,7 @@ export function createCanvasTool(options?: {
|
||||
typeof params.jsonl === "string" && params.jsonl.trim()
|
||||
? params.jsonl
|
||||
: typeof params.jsonlPath === "string" && params.jsonlPath.trim()
|
||||
? await readJsonlFromPath({
|
||||
jsonlPath: params.jsonlPath,
|
||||
localRoots,
|
||||
})
|
||||
? await readJsonlFromPath(params.jsonlPath)
|
||||
: "";
|
||||
if (!jsonl.trim()) {
|
||||
throw new Error("jsonl or jsonlPath required");
|
||||
|
||||
@@ -434,6 +434,12 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"channels.discord.maxLinesPerMessage": "Soft max line count per Discord message (default: 17).",
|
||||
"channels.discord.ui.components.accentColor":
|
||||
"Accent color for Discord component containers (hex). Set per account via channels.discord.accounts.<id>.ui.components.accentColor.",
|
||||
"channels.discord.voice.enabled":
|
||||
"Enable Discord voice channel conversations (default: true). Omit channels.discord.voice to keep voice support disabled for the account.",
|
||||
"channels.discord.voice.autoJoin":
|
||||
"Voice channels to auto-join on startup (list of guildId/channelId entries).",
|
||||
"channels.discord.voice.tts":
|
||||
"Optional TTS overrides for Discord voice playback (merged with messages.tts).",
|
||||
"channels.discord.intents.presence":
|
||||
"Enable the Guild Presences privileged intent. Must also be enabled in the Discord Developer Portal. Allows tracking user activities (e.g. Spotify). Default: false.",
|
||||
"channels.discord.intents.guildMembers":
|
||||
|
||||
@@ -291,6 +291,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"channels.discord.ui.components.accentColor": "Discord Component Accent Color",
|
||||
"channels.discord.intents.presence": "Discord Presence Intent",
|
||||
"channels.discord.intents.guildMembers": "Discord Guild Members Intent",
|
||||
"channels.discord.voice.enabled": "Discord Voice Enabled",
|
||||
"channels.discord.voice.autoJoin": "Discord Voice Auto-Join",
|
||||
"channels.discord.pluralkit.enabled": "Discord PluralKit Enabled",
|
||||
"channels.discord.pluralkit.token": "Discord PluralKit Token",
|
||||
"channels.discord.activity": "Discord Presence Activity",
|
||||
|
||||
@@ -11,6 +11,7 @@ import type {
|
||||
import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js";
|
||||
import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js";
|
||||
import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js";
|
||||
import type { TtsConfig } from "./types.tts.js";
|
||||
|
||||
export type DiscordStreamMode = "partial" | "block" | "off";
|
||||
|
||||
@@ -94,6 +95,22 @@ export type DiscordIntentsConfig = {
|
||||
guildMembers?: boolean;
|
||||
};
|
||||
|
||||
export type DiscordVoiceAutoJoinConfig = {
|
||||
/** Guild ID that owns the voice channel. */
|
||||
guildId: string;
|
||||
/** Voice channel ID to join. */
|
||||
channelId: string;
|
||||
};
|
||||
|
||||
export type DiscordVoiceConfig = {
|
||||
/** Enable Discord voice channel conversations (default: true). */
|
||||
enabled?: boolean;
|
||||
/** Voice channels to auto-join on startup. */
|
||||
autoJoin?: DiscordVoiceAutoJoinConfig[];
|
||||
/** Optional TTS overrides for Discord voice output. */
|
||||
tts?: TtsConfig;
|
||||
};
|
||||
|
||||
export type DiscordExecApprovalConfig = {
|
||||
/** Enable exec approval forwarding to Discord DMs. Default: false. */
|
||||
enabled?: boolean;
|
||||
@@ -211,6 +228,8 @@ export type DiscordAccountConfig = {
|
||||
ui?: DiscordUiConfig;
|
||||
/** Privileged Gateway Intents (must also be enabled in Discord Developer Portal). */
|
||||
intents?: DiscordIntentsConfig;
|
||||
/** Voice channel conversation settings. */
|
||||
voice?: DiscordVoiceConfig;
|
||||
/** PluralKit identity resolution for proxied messages. */
|
||||
pluralkit?: DiscordPluralKitConfig;
|
||||
/** Outbound response prefix override for this channel/account. */
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
ProviderCommandsSchema,
|
||||
ReplyToModeSchema,
|
||||
RetryConfigSchema,
|
||||
TtsConfigSchema,
|
||||
requireOpenAllowFrom,
|
||||
} from "./zod-schema.core.js";
|
||||
import { sensitive } from "./zod-schema.sensitive.js";
|
||||
@@ -271,6 +272,22 @@ const DiscordUiSchema = z
|
||||
.strict()
|
||||
.optional();
|
||||
|
||||
const DiscordVoiceAutoJoinSchema = z
|
||||
.object({
|
||||
guildId: z.string().min(1),
|
||||
channelId: z.string().min(1),
|
||||
})
|
||||
.strict();
|
||||
|
||||
const DiscordVoiceSchema = z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
autoJoin: z.array(DiscordVoiceAutoJoinSchema).optional(),
|
||||
tts: TtsConfigSchema.optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional();
|
||||
|
||||
export const DiscordAccountSchema = z
|
||||
.object({
|
||||
name: z.string().optional(),
|
||||
@@ -347,6 +364,7 @@ export const DiscordAccountSchema = z
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
voice: DiscordVoiceSchema,
|
||||
pluralkit: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
|
||||
@@ -14,7 +14,8 @@ export function resolveDiscordGatewayIntents(
|
||||
GatewayIntents.MessageContent |
|
||||
GatewayIntents.DirectMessages |
|
||||
GatewayIntents.GuildMessageReactions |
|
||||
GatewayIntents.DirectMessageReactions;
|
||||
GatewayIntents.DirectMessageReactions |
|
||||
GatewayIntents.GuildVoiceStates;
|
||||
if (intentsConfig?.presence) {
|
||||
intents |= GatewayIntents.GuildPresences;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import {
|
||||
type Modal,
|
||||
} from "@buape/carbon";
|
||||
import { GatewayCloseCodes, type GatewayPlugin } from "@buape/carbon/gateway";
|
||||
import { VoicePlugin } from "@buape/carbon/voice";
|
||||
import { Routes } from "discord-api-types/v10";
|
||||
import { resolveTextChunkLimit } from "../../auto-reply/chunk.js";
|
||||
import { listNativeCommandSpecsForConfig } from "../../auto-reply/commands-registry.js";
|
||||
@@ -38,6 +39,8 @@ import { fetchDiscordApplicationId } from "../probe.js";
|
||||
import { resolveDiscordChannelAllowlist } from "../resolve-channels.js";
|
||||
import { resolveDiscordUserAllowlist } from "../resolve-users.js";
|
||||
import { normalizeDiscordToken } from "../token.js";
|
||||
import { createDiscordVoiceCommand } from "../voice/command.js";
|
||||
import { DiscordVoiceManager, DiscordVoiceReadyListener } from "../voice/manager.js";
|
||||
import {
|
||||
createAgentComponentButton,
|
||||
createAgentSelectMenu,
|
||||
@@ -241,6 +244,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
const useAccessGroups = cfg.commands?.useAccessGroups !== false;
|
||||
const sessionPrefix = "discord:slash";
|
||||
const ephemeralDefault = true;
|
||||
const voiceEnabled = discordCfg.voice?.enabled !== false;
|
||||
|
||||
if (token) {
|
||||
if (guildEntries && Object.keys(guildEntries).length > 0) {
|
||||
@@ -428,6 +432,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
),
|
||||
);
|
||||
}
|
||||
const voiceManagerRef: { current: DiscordVoiceManager | null } = { current: null };
|
||||
const commands = commandSpecs.map((spec) =>
|
||||
createDiscordNativeCommand({
|
||||
command: spec,
|
||||
@@ -438,6 +443,19 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
ephemeralDefault,
|
||||
}),
|
||||
);
|
||||
if (nativeEnabled && voiceEnabled) {
|
||||
commands.push(
|
||||
createDiscordVoiceCommand({
|
||||
cfg,
|
||||
discordConfig: discordCfg,
|
||||
accountId: account.accountId,
|
||||
groupPolicy,
|
||||
useAccessGroups,
|
||||
getManager: () => voiceManagerRef.current,
|
||||
ephemeralDefault,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// Initialize exec approvals handler if enabled
|
||||
const execApprovalsConfig = discordCfg.execApprovals ?? {};
|
||||
@@ -506,6 +524,10 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
const clientPlugins = [createDiscordGatewayPlugin({ discordConfig: discordCfg, runtime })];
|
||||
if (voiceEnabled) {
|
||||
clientPlugins.push(new VoicePlugin());
|
||||
}
|
||||
const client = new Client(
|
||||
{
|
||||
baseUrl: "http://localhost",
|
||||
@@ -521,7 +543,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
components,
|
||||
modals,
|
||||
},
|
||||
[createDiscordGatewayPlugin({ discordConfig: discordCfg, runtime })],
|
||||
clientPlugins,
|
||||
);
|
||||
|
||||
await deployDiscordCommands({ client, runtime, enabled: nativeEnabled });
|
||||
@@ -529,6 +551,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
const logger = createSubsystemLogger("discord/monitor");
|
||||
const guildHistories = new Map<string, HistoryEntry[]>();
|
||||
let botUserId: string | undefined;
|
||||
let voiceManager: DiscordVoiceManager | null = null;
|
||||
|
||||
if (nativeDisabledExplicit) {
|
||||
await clearDiscordNativeCommands({
|
||||
@@ -545,6 +568,19 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
runtime.error?.(danger(`discord: failed to fetch bot identity: ${String(err)}`));
|
||||
}
|
||||
|
||||
if (voiceEnabled) {
|
||||
voiceManager = new DiscordVoiceManager({
|
||||
client,
|
||||
cfg,
|
||||
discordConfig: discordCfg,
|
||||
accountId: account.accountId,
|
||||
runtime,
|
||||
botUserId,
|
||||
});
|
||||
voiceManagerRef.current = voiceManager;
|
||||
registerDiscordListener(client.listeners, new DiscordVoiceReadyListener(voiceManager));
|
||||
}
|
||||
|
||||
const messageHandler = createDiscordMessageHandler({
|
||||
cfg,
|
||||
discordConfig: discordCfg,
|
||||
@@ -697,6 +733,10 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
|
||||
}
|
||||
gatewayEmitter?.removeListener("debug", onGatewayDebug);
|
||||
abortSignal?.removeEventListener("abort", onAbort);
|
||||
if (voiceManager) {
|
||||
await voiceManager.destroy();
|
||||
voiceManagerRef.current = null;
|
||||
}
|
||||
if (execApprovalsHandler) {
|
||||
await execApprovalsHandler.stop();
|
||||
}
|
||||
|
||||
339
src/discord/voice/command.ts
Normal file
339
src/discord/voice/command.ts
Normal file
@@ -0,0 +1,339 @@
|
||||
import {
|
||||
ChannelType as CarbonChannelType,
|
||||
Command,
|
||||
CommandWithSubcommands,
|
||||
type CommandInteraction,
|
||||
} from "@buape/carbon";
|
||||
import {
|
||||
ApplicationCommandOptionType,
|
||||
ChannelType as DiscordChannelType,
|
||||
} from "discord-api-types/v10";
|
||||
import { resolveCommandAuthorizedFromAuthorizers } from "../../channels/command-gating.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { DiscordAccountConfig } from "../../config/types.js";
|
||||
import {
|
||||
allowListMatches,
|
||||
isDiscordGroupAllowedByPolicy,
|
||||
normalizeDiscordAllowList,
|
||||
normalizeDiscordSlug,
|
||||
resolveDiscordChannelConfigWithFallback,
|
||||
resolveDiscordGuildEntry,
|
||||
resolveDiscordMemberAccessState,
|
||||
} from "../monitor/allow-list.js";
|
||||
import { resolveDiscordChannelInfo } from "../monitor/message-utils.js";
|
||||
import { resolveDiscordSenderIdentity } from "../monitor/sender-identity.js";
|
||||
import { resolveDiscordThreadParentInfo } from "../monitor/threading.js";
|
||||
import type { DiscordVoiceManager } from "./manager.js";
|
||||
|
||||
const VOICE_CHANNEL_TYPES: DiscordChannelType[] = [
|
||||
DiscordChannelType.GuildVoice,
|
||||
DiscordChannelType.GuildStageVoice,
|
||||
];
|
||||
|
||||
type VoiceCommandContext = {
|
||||
cfg: OpenClawConfig;
|
||||
discordConfig: DiscordAccountConfig;
|
||||
accountId: string;
|
||||
groupPolicy: "open" | "disabled" | "allowlist";
|
||||
useAccessGroups: boolean;
|
||||
getManager: () => DiscordVoiceManager | null;
|
||||
ephemeralDefault: boolean;
|
||||
};
|
||||
|
||||
type VoiceCommandChannelOverride = {
|
||||
id: string;
|
||||
name?: string;
|
||||
parentId?: string;
|
||||
};
|
||||
|
||||
async function authorizeVoiceCommand(
|
||||
interaction: CommandInteraction,
|
||||
params: VoiceCommandContext,
|
||||
options?: { channelOverride?: VoiceCommandChannelOverride },
|
||||
): Promise<{ ok: boolean; message?: string; guildId?: string }> {
|
||||
const channelOverride = options?.channelOverride;
|
||||
const channel = channelOverride ? undefined : interaction.channel;
|
||||
if (!interaction.guild) {
|
||||
return { ok: false, message: "Voice commands are only available in guilds." };
|
||||
}
|
||||
const user = interaction.user;
|
||||
if (!user) {
|
||||
return { ok: false, message: "Unable to resolve command user." };
|
||||
}
|
||||
|
||||
const channelId = channelOverride?.id ?? channel?.id ?? "";
|
||||
const rawChannelName =
|
||||
channelOverride?.name ?? (channel && "name" in channel ? (channel.name as string) : undefined);
|
||||
const rawParentId =
|
||||
channelOverride?.parentId ??
|
||||
("parentId" in (channel ?? {})
|
||||
? ((channel as { parentId?: string }).parentId ?? undefined)
|
||||
: undefined);
|
||||
const channelInfo = channelId
|
||||
? await resolveDiscordChannelInfo(interaction.client, channelId)
|
||||
: null;
|
||||
const channelName = rawChannelName ?? channelInfo?.name;
|
||||
const channelSlug = channelName ? normalizeDiscordSlug(channelName) : "";
|
||||
const isThreadChannel =
|
||||
channelInfo?.type === CarbonChannelType.PublicThread ||
|
||||
channelInfo?.type === CarbonChannelType.PrivateThread ||
|
||||
channelInfo?.type === CarbonChannelType.AnnouncementThread;
|
||||
let parentId: string | undefined;
|
||||
let parentName: string | undefined;
|
||||
let parentSlug: string | undefined;
|
||||
if (isThreadChannel && channelId) {
|
||||
const parentInfo = await resolveDiscordThreadParentInfo({
|
||||
client: interaction.client,
|
||||
threadChannel: {
|
||||
id: channelId,
|
||||
name: channelName,
|
||||
parentId: rawParentId ?? channelInfo?.parentId,
|
||||
parent: undefined,
|
||||
},
|
||||
channelInfo,
|
||||
});
|
||||
parentId = parentInfo.id;
|
||||
parentName = parentInfo.name;
|
||||
parentSlug = parentName ? normalizeDiscordSlug(parentName) : undefined;
|
||||
}
|
||||
|
||||
const guildInfo = resolveDiscordGuildEntry({
|
||||
guild: interaction.guild ?? undefined,
|
||||
guildEntries: params.discordConfig.guilds,
|
||||
});
|
||||
|
||||
const channelConfig = channelId
|
||||
? resolveDiscordChannelConfigWithFallback({
|
||||
guildInfo,
|
||||
channelId,
|
||||
channelName,
|
||||
channelSlug,
|
||||
parentId,
|
||||
parentName,
|
||||
parentSlug,
|
||||
scope: isThreadChannel ? "thread" : "channel",
|
||||
})
|
||||
: null;
|
||||
|
||||
if (channelConfig?.enabled === false) {
|
||||
return { ok: false, message: "This channel is disabled." };
|
||||
}
|
||||
|
||||
const channelAllowlistConfigured =
|
||||
Boolean(guildInfo?.channels) && Object.keys(guildInfo?.channels ?? {}).length > 0;
|
||||
const channelAllowed = channelConfig?.allowed !== false;
|
||||
if (
|
||||
!isDiscordGroupAllowedByPolicy({
|
||||
groupPolicy: params.groupPolicy,
|
||||
guildAllowlisted: Boolean(guildInfo),
|
||||
channelAllowlistConfigured,
|
||||
channelAllowed,
|
||||
}) ||
|
||||
channelConfig?.allowed === false
|
||||
) {
|
||||
const channelId = channelOverride?.id ?? channel?.id;
|
||||
const channelLabel = channelId ? `<#${channelId}>` : "This channel";
|
||||
return {
|
||||
ok: false,
|
||||
message: `${channelLabel} is not allowlisted for voice commands.`,
|
||||
};
|
||||
}
|
||||
|
||||
const memberRoleIds = Array.isArray(interaction.rawData.member?.roles)
|
||||
? interaction.rawData.member.roles.map((roleId: string) => String(roleId))
|
||||
: [];
|
||||
const sender = resolveDiscordSenderIdentity({ author: user, member: interaction.rawData.member });
|
||||
|
||||
const { hasAccessRestrictions, memberAllowed } = resolveDiscordMemberAccessState({
|
||||
channelConfig,
|
||||
guildInfo,
|
||||
memberRoleIds,
|
||||
sender,
|
||||
});
|
||||
|
||||
const ownerAllowList = normalizeDiscordAllowList(
|
||||
params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? [],
|
||||
["discord:", "user:", "pk:"],
|
||||
);
|
||||
const ownerOk = ownerAllowList
|
||||
? allowListMatches(ownerAllowList, {
|
||||
id: sender.id,
|
||||
name: sender.name,
|
||||
tag: sender.tag,
|
||||
})
|
||||
: false;
|
||||
|
||||
const authorizers = params.useAccessGroups
|
||||
? [
|
||||
{ configured: ownerAllowList != null, allowed: ownerOk },
|
||||
{ configured: hasAccessRestrictions, allowed: memberAllowed },
|
||||
]
|
||||
: [{ configured: hasAccessRestrictions, allowed: memberAllowed }];
|
||||
|
||||
const commandAuthorized = resolveCommandAuthorizedFromAuthorizers({
|
||||
useAccessGroups: params.useAccessGroups,
|
||||
authorizers,
|
||||
modeWhenAccessGroupsOff: "configured",
|
||||
});
|
||||
|
||||
if (!commandAuthorized) {
|
||||
return { ok: false, message: "You are not authorized to use this command." };
|
||||
}
|
||||
|
||||
return { ok: true, guildId: interaction.guild.id };
|
||||
}
|
||||
|
||||
export function createDiscordVoiceCommand(params: VoiceCommandContext): CommandWithSubcommands {
|
||||
const resolveSessionChannelId = (manager: DiscordVoiceManager, guildId: string) =>
|
||||
manager.status().find((entry) => entry.guildId === guildId)?.channelId;
|
||||
|
||||
class JoinCommand extends Command {
|
||||
name = "join";
|
||||
description = "Join a voice channel";
|
||||
defer = true;
|
||||
ephemeral = params.ephemeralDefault;
|
||||
options = [
|
||||
{
|
||||
name: "channel",
|
||||
description: "Voice channel to join",
|
||||
type: ApplicationCommandOptionType.Channel,
|
||||
required: true,
|
||||
channel_types: VOICE_CHANNEL_TYPES,
|
||||
},
|
||||
];
|
||||
|
||||
async run(interaction: CommandInteraction) {
|
||||
const channel = await interaction.options.getChannel("channel", true);
|
||||
if (!channel || !("id" in channel)) {
|
||||
await interaction.reply({ content: "Voice channel not found.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
|
||||
const access = await authorizeVoiceCommand(interaction, params, {
|
||||
channelOverride: {
|
||||
id: channel.id,
|
||||
name: "name" in channel ? (channel.name as string) : undefined,
|
||||
parentId:
|
||||
"parentId" in channel
|
||||
? ((channel as { parentId?: string }).parentId ?? undefined)
|
||||
: undefined,
|
||||
},
|
||||
});
|
||||
if (!access.ok) {
|
||||
await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
if (!isVoiceChannelType(channel.type)) {
|
||||
await interaction.reply({ content: "That is not a voice channel.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
const guildId = access.guildId ?? ("guildId" in channel ? channel.guildId : undefined);
|
||||
if (!guildId) {
|
||||
await interaction.reply({
|
||||
content: "Unable to resolve guild for this voice channel.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const manager = params.getManager();
|
||||
if (!manager) {
|
||||
await interaction.reply({
|
||||
content: "Voice manager is not available yet.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await manager.join({ guildId, channelId: channel.id });
|
||||
await interaction.reply({ content: result.message, ephemeral: true });
|
||||
}
|
||||
}
|
||||
|
||||
class LeaveCommand extends Command {
|
||||
name = "leave";
|
||||
description = "Leave the current voice channel";
|
||||
defer = true;
|
||||
ephemeral = params.ephemeralDefault;
|
||||
|
||||
async run(interaction: CommandInteraction) {
|
||||
const guildId = interaction.guild?.id;
|
||||
if (!guildId) {
|
||||
await interaction.reply({
|
||||
content: "Unable to resolve guild for this command.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const manager = params.getManager();
|
||||
if (!manager) {
|
||||
await interaction.reply({
|
||||
content: "Voice manager is not available yet.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const sessionChannelId = resolveSessionChannelId(manager, guildId);
|
||||
const access = await authorizeVoiceCommand(interaction, params, {
|
||||
channelOverride: sessionChannelId ? { id: sessionChannelId } : undefined,
|
||||
});
|
||||
if (!access.ok) {
|
||||
await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
const result = await manager.leave({ guildId });
|
||||
await interaction.reply({ content: result.message, ephemeral: true });
|
||||
}
|
||||
}
|
||||
|
||||
class StatusCommand extends Command {
|
||||
name = "status";
|
||||
description = "Show active voice sessions";
|
||||
defer = true;
|
||||
ephemeral = params.ephemeralDefault;
|
||||
|
||||
async run(interaction: CommandInteraction) {
|
||||
const guildId = interaction.guild?.id;
|
||||
if (!guildId) {
|
||||
await interaction.reply({
|
||||
content: "Unable to resolve guild for this command.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const manager = params.getManager();
|
||||
if (!manager) {
|
||||
await interaction.reply({
|
||||
content: "Voice manager is not available yet.",
|
||||
ephemeral: true,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const sessions = manager.status().filter((entry) => entry.guildId === guildId);
|
||||
const sessionChannelId = sessions[0]?.channelId;
|
||||
const access = await authorizeVoiceCommand(interaction, params, {
|
||||
channelOverride: sessionChannelId ? { id: sessionChannelId } : undefined,
|
||||
});
|
||||
if (!access.ok) {
|
||||
await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
if (sessions.length === 0) {
|
||||
await interaction.reply({ content: "No active voice sessions.", ephemeral: true });
|
||||
return;
|
||||
}
|
||||
const lines = sessions.map((entry) => `• <#${entry.channelId}> (guild ${entry.guildId})`);
|
||||
await interaction.reply({ content: lines.join("\n"), ephemeral: true });
|
||||
}
|
||||
}
|
||||
|
||||
return new (class extends CommandWithSubcommands {
|
||||
name = "vc";
|
||||
description = "Voice channel controls";
|
||||
subcommands = [new JoinCommand(), new LeaveCommand(), new StatusCommand()];
|
||||
})();
|
||||
}
|
||||
|
||||
function isVoiceChannelType(type: CarbonChannelType) {
|
||||
return type === CarbonChannelType.GuildVoice || type === CarbonChannelType.GuildStageVoice;
|
||||
}
|
||||
670
src/discord/voice/manager.ts
Normal file
670
src/discord/voice/manager.ts
Normal file
@@ -0,0 +1,670 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import { createRequire } from "node:module";
|
||||
import path from "node:path";
|
||||
import type { Readable } from "node:stream";
|
||||
import { ChannelType, type Client, ReadyListener } from "@buape/carbon";
|
||||
import type { VoicePlugin } from "@buape/carbon/voice";
|
||||
import {
|
||||
AudioPlayerStatus,
|
||||
EndBehaviorType,
|
||||
VoiceConnectionStatus,
|
||||
createAudioPlayer,
|
||||
createAudioResource,
|
||||
entersState,
|
||||
joinVoiceChannel,
|
||||
type AudioPlayer,
|
||||
type VoiceConnection,
|
||||
} from "@discordjs/voice";
|
||||
import { resolveAgentDir } from "../../agents/agent-scope.js";
|
||||
import type { MsgContext } from "../../auto-reply/templating.js";
|
||||
import { agentCommand } from "../../commands/agent.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { DiscordAccountConfig, TtsConfig } from "../../config/types.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../../globals.js";
|
||||
import { formatErrorMessage } from "../../infra/errors.js";
|
||||
import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "../../media-understanding/runner.js";
|
||||
import { resolveAgentRoute } from "../../routing/resolve-route.js";
|
||||
import type { RuntimeEnv } from "../../runtime.js";
|
||||
import { parseTtsDirectives } from "../../tts/tts-core.js";
|
||||
import { resolveTtsConfig, textToSpeech, type ResolvedTtsConfig } from "../../tts/tts.js";
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const OpusScript = require("opusscript") as typeof import("opusscript");
|
||||
|
||||
const SAMPLE_RATE = 48_000;
|
||||
const CHANNELS = 2;
|
||||
const BIT_DEPTH = 16;
|
||||
const MIN_SEGMENT_SECONDS = 0.35;
|
||||
const SILENCE_DURATION_MS = 1_000;
|
||||
const PLAYBACK_READY_TIMEOUT_MS = 15_000;
|
||||
const SPEAKING_READY_TIMEOUT_MS = 60_000;
|
||||
|
||||
const logger = createSubsystemLogger("discord/voice");
|
||||
|
||||
const logVoiceVerbose = (message: string) => {
|
||||
logVerbose(`discord voice: ${message}`);
|
||||
};
|
||||
|
||||
type VoiceOperationResult = {
|
||||
ok: boolean;
|
||||
message: string;
|
||||
channelId?: string;
|
||||
guildId?: string;
|
||||
};
|
||||
|
||||
type VoiceSessionEntry = {
|
||||
guildId: string;
|
||||
channelId: string;
|
||||
sessionChannelId: string;
|
||||
route: ReturnType<typeof resolveAgentRoute>;
|
||||
connection: VoiceConnection;
|
||||
player: AudioPlayer;
|
||||
playbackQueue: Promise<void>;
|
||||
processingQueue: Promise<void>;
|
||||
activeSpeakers: Set<string>;
|
||||
stop: () => void;
|
||||
};
|
||||
|
||||
function mergeTtsConfig(base: TtsConfig, override?: TtsConfig): TtsConfig {
|
||||
if (!override) {
|
||||
return base;
|
||||
}
|
||||
return {
|
||||
...base,
|
||||
...override,
|
||||
modelOverrides: {
|
||||
...base.modelOverrides,
|
||||
...override.modelOverrides,
|
||||
},
|
||||
elevenlabs: {
|
||||
...base.elevenlabs,
|
||||
...override.elevenlabs,
|
||||
voiceSettings: {
|
||||
...base.elevenlabs?.voiceSettings,
|
||||
...override.elevenlabs?.voiceSettings,
|
||||
},
|
||||
},
|
||||
openai: {
|
||||
...base.openai,
|
||||
...override.openai,
|
||||
},
|
||||
edge: {
|
||||
...base.edge,
|
||||
...override.edge,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function resolveVoiceTtsConfig(params: { cfg: OpenClawConfig; override?: TtsConfig }): {
|
||||
cfg: OpenClawConfig;
|
||||
resolved: ResolvedTtsConfig;
|
||||
} {
|
||||
if (!params.override) {
|
||||
return { cfg: params.cfg, resolved: resolveTtsConfig(params.cfg) };
|
||||
}
|
||||
const base = params.cfg.messages?.tts ?? {};
|
||||
const merged = mergeTtsConfig(base, params.override);
|
||||
const messages = params.cfg.messages ?? {};
|
||||
const cfg = {
|
||||
...params.cfg,
|
||||
messages: {
|
||||
...messages,
|
||||
tts: merged,
|
||||
},
|
||||
};
|
||||
return { cfg, resolved: resolveTtsConfig(cfg) };
|
||||
}
|
||||
|
||||
function buildWavBuffer(pcm: Buffer): Buffer {
|
||||
const blockAlign = (CHANNELS * BIT_DEPTH) / 8;
|
||||
const byteRate = SAMPLE_RATE * blockAlign;
|
||||
const header = Buffer.alloc(44);
|
||||
header.write("RIFF", 0);
|
||||
header.writeUInt32LE(36 + pcm.length, 4);
|
||||
header.write("WAVE", 8);
|
||||
header.write("fmt ", 12);
|
||||
header.writeUInt32LE(16, 16);
|
||||
header.writeUInt16LE(1, 20);
|
||||
header.writeUInt16LE(CHANNELS, 22);
|
||||
header.writeUInt32LE(SAMPLE_RATE, 24);
|
||||
header.writeUInt32LE(byteRate, 28);
|
||||
header.writeUInt16LE(blockAlign, 32);
|
||||
header.writeUInt16LE(BIT_DEPTH, 34);
|
||||
header.write("data", 36);
|
||||
header.writeUInt32LE(pcm.length, 40);
|
||||
return Buffer.concat([header, pcm]);
|
||||
}
|
||||
|
||||
type OpusDecoder = {
|
||||
decode: (buffer: Buffer) => Buffer;
|
||||
};
|
||||
|
||||
function createOpusDecoder(): { decoder: OpusDecoder; name: string } | null {
|
||||
try {
|
||||
const decoder = new OpusScript(SAMPLE_RATE, CHANNELS, OpusScript.Application.AUDIO);
|
||||
return { decoder, name: "opusscript" };
|
||||
} catch (err) {
|
||||
logger.warn(`discord voice: opusscript init failed: ${formatErrorMessage(err)}`);
|
||||
}
|
||||
try {
|
||||
const { OpusEncoder } = require("@discordjs/opus") as typeof import("@discordjs/opus");
|
||||
const decoder = new OpusEncoder(SAMPLE_RATE, CHANNELS);
|
||||
return { decoder, name: "@discordjs/opus" };
|
||||
} catch (err) {
|
||||
logger.warn(`discord voice: opus decoder init failed: ${formatErrorMessage(err)}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function decodeOpusStream(stream: Readable): Promise<Buffer> {
|
||||
const selected = createOpusDecoder();
|
||||
if (!selected) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
logVoiceVerbose(`opus decoder: ${selected.name}`);
|
||||
const chunks: Buffer[] = [];
|
||||
try {
|
||||
for await (const chunk of stream) {
|
||||
if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const decoded = selected.decoder.decode(chunk);
|
||||
if (decoded && decoded.length > 0) {
|
||||
chunks.push(Buffer.from(decoded));
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`);
|
||||
}
|
||||
}
|
||||
return chunks.length > 0 ? Buffer.concat(chunks) : Buffer.alloc(0);
|
||||
}
|
||||
|
||||
function estimateDurationSeconds(pcm: Buffer): number {
|
||||
const bytesPerSample = (BIT_DEPTH / 8) * CHANNELS;
|
||||
if (bytesPerSample <= 0) {
|
||||
return 0;
|
||||
}
|
||||
return pcm.length / (bytesPerSample * SAMPLE_RATE);
|
||||
}
|
||||
|
||||
async function writeWavFile(pcm: Buffer): Promise<{ path: string; durationSeconds: number }> {
|
||||
const tempDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "discord-voice-"));
|
||||
const filePath = path.join(tempDir, `segment-${randomUUID()}.wav`);
|
||||
const wav = buildWavBuffer(pcm);
|
||||
await fs.writeFile(filePath, wav);
|
||||
scheduleTempCleanup(tempDir);
|
||||
return { path: filePath, durationSeconds: estimateDurationSeconds(pcm) };
|
||||
}
|
||||
|
||||
function scheduleTempCleanup(tempDir: string, delayMs: number = 30 * 60 * 1000): void {
|
||||
const timer = setTimeout(() => {
|
||||
fs.rm(tempDir, { recursive: true, force: true }).catch((err) => {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(`discord voice: temp cleanup failed for ${tempDir}: ${formatErrorMessage(err)}`);
|
||||
}
|
||||
});
|
||||
}, delayMs);
|
||||
timer.unref();
|
||||
}
|
||||
|
||||
async function transcribeAudio(params: {
|
||||
cfg: OpenClawConfig;
|
||||
agentId: string;
|
||||
filePath: string;
|
||||
}): Promise<string | undefined> {
|
||||
const ctx: MsgContext = {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: "audio/wav",
|
||||
};
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
const cache = createMediaAttachmentCache(attachments);
|
||||
const providerRegistry = buildProviderRegistry();
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: resolveAgentDir(params.cfg, params.agentId),
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const text = output?.text?.trim();
|
||||
return text || undefined;
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
|
||||
export class DiscordVoiceManager {
|
||||
private sessions = new Map<string, VoiceSessionEntry>();
|
||||
private botUserId?: string;
|
||||
private readonly voiceEnabled: boolean;
|
||||
private autoJoinTask: Promise<void> | null = null;
|
||||
|
||||
constructor(
|
||||
private params: {
|
||||
client: Client;
|
||||
cfg: OpenClawConfig;
|
||||
discordConfig: DiscordAccountConfig;
|
||||
accountId: string;
|
||||
runtime: RuntimeEnv;
|
||||
botUserId?: string;
|
||||
},
|
||||
) {
|
||||
this.botUserId = params.botUserId;
|
||||
this.voiceEnabled = params.discordConfig.voice?.enabled !== false;
|
||||
}
|
||||
|
||||
setBotUserId(id?: string) {
|
||||
if (id) {
|
||||
this.botUserId = id;
|
||||
}
|
||||
}
|
||||
|
||||
isEnabled() {
|
||||
return this.voiceEnabled;
|
||||
}
|
||||
|
||||
async autoJoin(): Promise<void> {
|
||||
if (!this.voiceEnabled) {
|
||||
return;
|
||||
}
|
||||
if (this.autoJoinTask) {
|
||||
return this.autoJoinTask;
|
||||
}
|
||||
this.autoJoinTask = (async () => {
|
||||
const entries = this.params.discordConfig.voice?.autoJoin ?? [];
|
||||
logVoiceVerbose(`autoJoin: ${entries.length} entries`);
|
||||
const seenGuilds = new Set<string>();
|
||||
for (const entry of entries) {
|
||||
const guildId = entry.guildId.trim();
|
||||
if (!guildId) {
|
||||
continue;
|
||||
}
|
||||
if (seenGuilds.has(guildId)) {
|
||||
logger.warn(
|
||||
`discord voice: autoJoin has multiple entries for guild ${guildId}; skipping`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
seenGuilds.add(guildId);
|
||||
logVoiceVerbose(`autoJoin: joining guild ${guildId} channel ${entry.channelId}`);
|
||||
await this.join({
|
||||
guildId: entry.guildId,
|
||||
channelId: entry.channelId,
|
||||
});
|
||||
}
|
||||
})().finally(() => {
|
||||
this.autoJoinTask = null;
|
||||
});
|
||||
return this.autoJoinTask;
|
||||
}
|
||||
|
||||
status(): VoiceOperationResult[] {
|
||||
return Array.from(this.sessions.values()).map((session) => ({
|
||||
ok: true,
|
||||
message: `connected: guild ${session.guildId} channel ${session.channelId}`,
|
||||
guildId: session.guildId,
|
||||
channelId: session.channelId,
|
||||
}));
|
||||
}
|
||||
|
||||
async join(params: { guildId: string; channelId: string }): Promise<VoiceOperationResult> {
|
||||
if (!this.voiceEnabled) {
|
||||
return {
|
||||
ok: false,
|
||||
message: "Discord voice is disabled (channels.discord.voice.enabled).",
|
||||
};
|
||||
}
|
||||
const guildId = params.guildId.trim();
|
||||
const channelId = params.channelId.trim();
|
||||
if (!guildId || !channelId) {
|
||||
return { ok: false, message: "Missing guildId or channelId." };
|
||||
}
|
||||
logVoiceVerbose(`join requested: guild ${guildId} channel ${channelId}`);
|
||||
|
||||
const existing = this.sessions.get(guildId);
|
||||
if (existing && existing.channelId === channelId) {
|
||||
logVoiceVerbose(`join: already connected to guild ${guildId} channel ${channelId}`);
|
||||
return { ok: true, message: `Already connected to <#${channelId}>.`, guildId, channelId };
|
||||
}
|
||||
if (existing) {
|
||||
logVoiceVerbose(`join: replacing existing session for guild ${guildId}`);
|
||||
await this.leave({ guildId });
|
||||
}
|
||||
|
||||
const channelInfo = await this.params.client.fetchChannel(channelId).catch(() => null);
|
||||
if (!channelInfo || ("type" in channelInfo && !isVoiceChannel(channelInfo.type))) {
|
||||
return { ok: false, message: `Channel ${channelId} is not a voice channel.` };
|
||||
}
|
||||
const channelGuildId = "guildId" in channelInfo ? channelInfo.guildId : undefined;
|
||||
if (channelGuildId && channelGuildId !== guildId) {
|
||||
return { ok: false, message: "Voice channel is not in this guild." };
|
||||
}
|
||||
|
||||
const voicePlugin = this.params.client.getPlugin<VoicePlugin>("voice");
|
||||
if (!voicePlugin) {
|
||||
return { ok: false, message: "Discord voice plugin is not available." };
|
||||
}
|
||||
|
||||
const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId);
|
||||
const connection = joinVoiceChannel({
|
||||
channelId,
|
||||
guildId,
|
||||
adapterCreator,
|
||||
selfDeaf: false,
|
||||
selfMute: false,
|
||||
});
|
||||
|
||||
try {
|
||||
await entersState(connection, VoiceConnectionStatus.Ready, PLAYBACK_READY_TIMEOUT_MS);
|
||||
logVoiceVerbose(`join: connected to guild ${guildId} channel ${channelId}`);
|
||||
} catch (err) {
|
||||
connection.destroy();
|
||||
return { ok: false, message: `Failed to join voice channel: ${formatErrorMessage(err)}` };
|
||||
}
|
||||
|
||||
const sessionChannelId = channelInfo?.id ?? channelId;
|
||||
// Use the voice channel id as the session channel so text chat in the voice channel
|
||||
// shares the same session as spoken audio.
|
||||
if (sessionChannelId !== channelId) {
|
||||
logVoiceVerbose(
|
||||
`join: using session channel ${sessionChannelId} for voice channel ${channelId}`,
|
||||
);
|
||||
}
|
||||
const route = resolveAgentRoute({
|
||||
cfg: this.params.cfg,
|
||||
channel: "discord",
|
||||
accountId: this.params.accountId,
|
||||
guildId,
|
||||
peer: { kind: "channel", id: sessionChannelId },
|
||||
});
|
||||
|
||||
const player = createAudioPlayer();
|
||||
connection.subscribe(player);
|
||||
|
||||
const entry: VoiceSessionEntry = {
|
||||
guildId,
|
||||
channelId,
|
||||
sessionChannelId,
|
||||
route,
|
||||
connection,
|
||||
player,
|
||||
playbackQueue: Promise.resolve(),
|
||||
processingQueue: Promise.resolve(),
|
||||
activeSpeakers: new Set(),
|
||||
stop: () => {
|
||||
player.stop();
|
||||
connection.destroy();
|
||||
},
|
||||
};
|
||||
|
||||
const speakingHandler = (userId: string) => {
|
||||
void this.handleSpeakingStart(entry, userId).catch((err) => {
|
||||
logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`);
|
||||
});
|
||||
};
|
||||
|
||||
connection.receiver.speaking.on("start", speakingHandler);
|
||||
connection.on(VoiceConnectionStatus.Disconnected, async () => {
|
||||
try {
|
||||
await Promise.race([
|
||||
entersState(connection, VoiceConnectionStatus.Signalling, 5_000),
|
||||
entersState(connection, VoiceConnectionStatus.Connecting, 5_000),
|
||||
]);
|
||||
} catch {
|
||||
this.sessions.delete(guildId);
|
||||
connection.destroy();
|
||||
}
|
||||
});
|
||||
connection.on(VoiceConnectionStatus.Destroyed, () => {
|
||||
this.sessions.delete(guildId);
|
||||
});
|
||||
|
||||
player.on("error", (err) => {
|
||||
logger.warn(`discord voice: playback error: ${formatErrorMessage(err)}`);
|
||||
});
|
||||
|
||||
this.sessions.set(guildId, entry);
|
||||
return {
|
||||
ok: true,
|
||||
message: `Joined <#${channelId}>.`,
|
||||
guildId,
|
||||
channelId,
|
||||
};
|
||||
}
|
||||
|
||||
async leave(params: { guildId: string; channelId?: string }): Promise<VoiceOperationResult> {
|
||||
const guildId = params.guildId.trim();
|
||||
logVoiceVerbose(`leave requested: guild ${guildId} channel ${params.channelId ?? "current"}`);
|
||||
const entry = this.sessions.get(guildId);
|
||||
if (!entry) {
|
||||
return { ok: false, message: "Not connected to a voice channel." };
|
||||
}
|
||||
if (params.channelId && params.channelId !== entry.channelId) {
|
||||
return { ok: false, message: "Not connected to that voice channel." };
|
||||
}
|
||||
entry.stop();
|
||||
this.sessions.delete(guildId);
|
||||
logVoiceVerbose(`leave: disconnected from guild ${guildId} channel ${entry.channelId}`);
|
||||
return {
|
||||
ok: true,
|
||||
message: `Left <#${entry.channelId}>.`,
|
||||
guildId,
|
||||
channelId: entry.channelId,
|
||||
};
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
for (const entry of this.sessions.values()) {
|
||||
entry.stop();
|
||||
}
|
||||
this.sessions.clear();
|
||||
}
|
||||
|
||||
private enqueueProcessing(entry: VoiceSessionEntry, task: () => Promise<void>) {
|
||||
entry.processingQueue = entry.processingQueue
|
||||
.then(task)
|
||||
.catch((err) => logger.warn(`discord voice: processing failed: ${formatErrorMessage(err)}`));
|
||||
}
|
||||
|
||||
private enqueuePlayback(entry: VoiceSessionEntry, task: () => Promise<void>) {
|
||||
entry.playbackQueue = entry.playbackQueue
|
||||
.then(task)
|
||||
.catch((err) => logger.warn(`discord voice: playback failed: ${formatErrorMessage(err)}`));
|
||||
}
|
||||
|
||||
private async handleSpeakingStart(entry: VoiceSessionEntry, userId: string) {
|
||||
if (!userId || entry.activeSpeakers.has(userId)) {
|
||||
return;
|
||||
}
|
||||
if (this.botUserId && userId === this.botUserId) {
|
||||
return;
|
||||
}
|
||||
|
||||
entry.activeSpeakers.add(userId);
|
||||
logVoiceVerbose(
|
||||
`capture start: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
if (entry.player.state.status === AudioPlayerStatus.Playing) {
|
||||
entry.player.stop(true);
|
||||
}
|
||||
|
||||
const stream = entry.connection.receiver.subscribe(userId, {
|
||||
end: {
|
||||
behavior: EndBehaviorType.AfterSilence,
|
||||
duration: SILENCE_DURATION_MS,
|
||||
},
|
||||
});
|
||||
stream.on("error", (err) => {
|
||||
logger.warn(`discord voice: receive error: ${formatErrorMessage(err)}`);
|
||||
});
|
||||
|
||||
try {
|
||||
const pcm = await decodeOpusStream(stream);
|
||||
if (pcm.length === 0) {
|
||||
logVoiceVerbose(
|
||||
`capture empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
const { path: wavPath, durationSeconds } = await writeWavFile(pcm);
|
||||
if (durationSeconds < MIN_SEGMENT_SECONDS) {
|
||||
logVoiceVerbose(
|
||||
`capture too short (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
logVoiceVerbose(
|
||||
`capture ready (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
this.enqueueProcessing(entry, async () => {
|
||||
await this.processSegment({ entry, wavPath, userId, durationSeconds });
|
||||
});
|
||||
} finally {
|
||||
entry.activeSpeakers.delete(userId);
|
||||
}
|
||||
}
|
||||
|
||||
private async processSegment(params: {
|
||||
entry: VoiceSessionEntry;
|
||||
wavPath: string;
|
||||
userId: string;
|
||||
durationSeconds: number;
|
||||
}) {
|
||||
const { entry, wavPath, userId, durationSeconds } = params;
|
||||
logVoiceVerbose(
|
||||
`segment processing (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId}`,
|
||||
);
|
||||
const transcript = await transcribeAudio({
|
||||
cfg: this.params.cfg,
|
||||
agentId: entry.route.agentId,
|
||||
filePath: wavPath,
|
||||
});
|
||||
if (!transcript) {
|
||||
logVoiceVerbose(
|
||||
`transcription empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
logVoiceVerbose(
|
||||
`transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`,
|
||||
);
|
||||
|
||||
const speakerLabel = await this.resolveSpeakerLabel(entry.guildId, userId);
|
||||
const prompt = speakerLabel ? `${speakerLabel}: ${transcript}` : transcript;
|
||||
|
||||
const result = await agentCommand(
|
||||
{
|
||||
message: prompt,
|
||||
sessionKey: entry.route.sessionKey,
|
||||
agentId: entry.route.agentId,
|
||||
messageChannel: "discord",
|
||||
deliver: false,
|
||||
},
|
||||
this.params.runtime,
|
||||
);
|
||||
|
||||
const replyText = (result.payloads ?? [])
|
||||
.map((payload) => payload.text)
|
||||
.filter((text) => typeof text === "string" && text.trim())
|
||||
.join("\n")
|
||||
.trim();
|
||||
|
||||
if (!replyText) {
|
||||
logVoiceVerbose(
|
||||
`reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
logVoiceVerbose(
|
||||
`reply ok (${replyText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`,
|
||||
);
|
||||
|
||||
const { cfg: ttsCfg, resolved: ttsConfig } = resolveVoiceTtsConfig({
|
||||
cfg: this.params.cfg,
|
||||
override: this.params.discordConfig.voice?.tts,
|
||||
});
|
||||
const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides);
|
||||
const speakText = directive.overrides.ttsText ?? directive.cleanedText.trim();
|
||||
if (!speakText) {
|
||||
logVoiceVerbose(
|
||||
`tts skipped (empty): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const ttsResult = await textToSpeech({
|
||||
text: speakText,
|
||||
cfg: ttsCfg,
|
||||
channel: "discord",
|
||||
overrides: directive.overrides,
|
||||
});
|
||||
if (!ttsResult.success || !ttsResult.audioPath) {
|
||||
logger.warn(`discord voice: TTS failed: ${ttsResult.error ?? "unknown error"}`);
|
||||
return;
|
||||
}
|
||||
logVoiceVerbose(
|
||||
`tts ok (${speakText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`,
|
||||
);
|
||||
|
||||
this.enqueuePlayback(entry, async () => {
|
||||
logVoiceVerbose(
|
||||
`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(ttsResult.audioPath)}`,
|
||||
);
|
||||
const resource = createAudioResource(ttsResult.audioPath);
|
||||
entry.player.play(resource);
|
||||
await entersState(entry.player, AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch(
|
||||
() => undefined,
|
||||
);
|
||||
await entersState(entry.player, AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch(
|
||||
() => undefined,
|
||||
);
|
||||
logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`);
|
||||
});
|
||||
}
|
||||
|
||||
private async resolveSpeakerLabel(guildId: string, userId: string): Promise<string | undefined> {
|
||||
try {
|
||||
const member = await this.params.client.fetchMember(guildId, userId);
|
||||
return member.nickname ?? member.user?.globalName ?? member.user?.username ?? userId;
|
||||
} catch {
|
||||
try {
|
||||
const user = await this.params.client.fetchUser(userId);
|
||||
return user.globalName ?? user.username ?? userId;
|
||||
} catch {
|
||||
return userId;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class DiscordVoiceReadyListener extends ReadyListener {
|
||||
constructor(private manager: DiscordVoiceManager) {
|
||||
super();
|
||||
}
|
||||
|
||||
async handle() {
|
||||
await this.manager.autoJoin();
|
||||
}
|
||||
}
|
||||
|
||||
function isVoiceChannel(type: ChannelType) {
|
||||
return type === ChannelType.GuildVoice || type === ChannelType.GuildStageVoice;
|
||||
}
|
||||
Reference in New Issue
Block a user