mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 10:51:23 +00:00
Add runtime.stt.transcribeAudioFile for plugin STT access
Expose audio transcription through the PluginRuntime so external
plugins (e.g. marmot) can use openclaw's media-understanding provider
framework without importing unexported internal modules.
The new transcribeAudioFile() wraps runCapability({capability: "audio"})
and reads provider/model/apiKey from tools.media.audio in the config,
matching the pattern used by the Discord VC implementation.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
committed by
Peter Steinberger
parent
f7b0378ccb
commit
faa4ffec03
51
src/media-understanding/transcribe-audio.ts
Normal file
51
src/media-understanding/transcribe-audio.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import {
|
||||
buildProviderRegistry,
|
||||
createMediaAttachmentCache,
|
||||
normalizeMediaAttachments,
|
||||
runCapability,
|
||||
} from "./runner.js";
|
||||
|
||||
/**
|
||||
* Transcribe an audio file using the configured media-understanding provider.
|
||||
*
|
||||
* Reads provider/model/apiKey from `tools.media.audio` in the openclaw config,
|
||||
* falling back through configured models until one succeeds.
|
||||
*
|
||||
* This is the runtime-exposed entry point for external plugins (e.g. marmot)
|
||||
* that need STT without importing internal media-understanding modules directly.
|
||||
*/
|
||||
export async function transcribeAudioFile(params: {
|
||||
filePath: string;
|
||||
cfg: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
mime?: string;
|
||||
}): Promise<{ text: string | undefined }> {
|
||||
const ctx = {
|
||||
MediaPath: params.filePath,
|
||||
MediaType: params.mime ?? "audio/wav",
|
||||
};
|
||||
const attachments = normalizeMediaAttachments(ctx);
|
||||
if (attachments.length === 0) {
|
||||
return { text: undefined };
|
||||
}
|
||||
const cache = createMediaAttachmentCache(attachments);
|
||||
const providerRegistry = buildProviderRegistry();
|
||||
try {
|
||||
const result = await runCapability({
|
||||
capability: "audio",
|
||||
cfg: params.cfg,
|
||||
ctx,
|
||||
attachments: cache,
|
||||
media: attachments,
|
||||
agentDir: params.agentDir,
|
||||
providerRegistry,
|
||||
config: params.cfg.tools?.media?.audio,
|
||||
});
|
||||
const output = result.outputs.find((entry) => entry.kind === "audio.transcription");
|
||||
const text = output?.text?.trim();
|
||||
return { text: text || undefined };
|
||||
} finally {
|
||||
await cache.cleanup();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user