mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-07 02:51:37 +00:00
refactor: unify media understanding pipeline
This commit is contained in:
154
src/media-understanding/resolve.ts
Normal file
154
src/media-understanding/resolve.ts
Normal file
@@ -0,0 +1,154 @@
|
||||
import type { ClawdbotConfig } from "../config/config.js";
|
||||
import type { MsgContext } from "../auto-reply/templating.js";
|
||||
import type {
|
||||
MediaUnderstandingConfig,
|
||||
MediaUnderstandingModelConfig,
|
||||
MediaUnderstandingScopeConfig,
|
||||
} from "../config/types.tools.js";
|
||||
import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import {
|
||||
DEFAULT_MAX_BYTES,
|
||||
DEFAULT_MAX_CHARS_BY_CAPABILITY,
|
||||
DEFAULT_MEDIA_CONCURRENCY,
|
||||
DEFAULT_PROMPT,
|
||||
} from "./defaults.js";
|
||||
import { normalizeMediaProviderId } from "./providers/index.js";
|
||||
import { normalizeMediaUnderstandingChatType, resolveMediaUnderstandingScope } from "./scope.js";
|
||||
import type { MediaUnderstandingCapability } from "./types.js";
|
||||
|
||||
export function resolveTimeoutMs(seconds: number | undefined, fallbackSeconds: number): number {
|
||||
const value = typeof seconds === "number" && Number.isFinite(seconds) ? seconds : fallbackSeconds;
|
||||
return Math.max(1000, Math.floor(value * 1000));
|
||||
}
|
||||
|
||||
export function resolvePrompt(
|
||||
capability: MediaUnderstandingCapability,
|
||||
prompt?: string,
|
||||
maxChars?: number,
|
||||
): string {
|
||||
const base = prompt?.trim() || DEFAULT_PROMPT[capability];
|
||||
if (!maxChars || capability === "audio") return base;
|
||||
return `${base} Respond in at most ${maxChars} characters.`;
|
||||
}
|
||||
|
||||
export function resolveMaxChars(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
cfg: ClawdbotConfig;
|
||||
config?: MediaUnderstandingConfig;
|
||||
}): number | undefined {
|
||||
const { capability, entry, cfg } = params;
|
||||
const configured =
|
||||
entry.maxChars ?? params.config?.maxChars ?? cfg.tools?.media?.[capability]?.maxChars;
|
||||
if (typeof configured === "number") return configured;
|
||||
return DEFAULT_MAX_CHARS_BY_CAPABILITY[capability];
|
||||
}
|
||||
|
||||
export function resolveMaxBytes(params: {
|
||||
capability: MediaUnderstandingCapability;
|
||||
entry: MediaUnderstandingModelConfig;
|
||||
cfg: ClawdbotConfig;
|
||||
config?: MediaUnderstandingConfig;
|
||||
}): number {
|
||||
const configured =
|
||||
params.entry.maxBytes ??
|
||||
params.config?.maxBytes ??
|
||||
params.cfg.tools?.media?.[params.capability]?.maxBytes;
|
||||
if (typeof configured === "number") return configured;
|
||||
return DEFAULT_MAX_BYTES[params.capability];
|
||||
}
|
||||
|
||||
export function resolveCapabilityConfig(
|
||||
cfg: ClawdbotConfig,
|
||||
capability: MediaUnderstandingCapability,
|
||||
): MediaUnderstandingConfig | undefined {
|
||||
return cfg.tools?.media?.[capability];
|
||||
}
|
||||
|
||||
export function resolveScopeDecision(params: {
|
||||
scope?: MediaUnderstandingScopeConfig;
|
||||
ctx: MsgContext;
|
||||
}): "allow" | "deny" {
|
||||
return resolveMediaUnderstandingScope({
|
||||
scope: params.scope,
|
||||
sessionKey: params.ctx.SessionKey,
|
||||
channel: params.ctx.Surface ?? params.ctx.Provider,
|
||||
chatType: normalizeMediaUnderstandingChatType(params.ctx.ChatType),
|
||||
});
|
||||
}
|
||||
|
||||
function inferCapabilities(
|
||||
entry: MediaUnderstandingModelConfig,
|
||||
): MediaUnderstandingCapability[] | undefined {
|
||||
if ((entry.type ?? (entry.command ? "cli" : "provider")) === "cli") {
|
||||
return ["image", "audio", "video"];
|
||||
}
|
||||
const provider = normalizeMediaProviderId(entry.provider ?? "");
|
||||
if (!provider) return undefined;
|
||||
if (provider === "openai" || provider === "anthropic" || provider === "minimax") {
|
||||
return ["image"];
|
||||
}
|
||||
if (provider === "google") {
|
||||
return ["image", "audio", "video"];
|
||||
}
|
||||
if (provider === "groq") {
|
||||
return ["audio"];
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function resolveModelEntries(params: {
|
||||
cfg: ClawdbotConfig;
|
||||
capability: MediaUnderstandingCapability;
|
||||
config?: MediaUnderstandingConfig;
|
||||
}): MediaUnderstandingModelConfig[] {
|
||||
const { cfg, capability, config } = params;
|
||||
const sharedModels = cfg.tools?.media?.models ?? [];
|
||||
const entries = [
|
||||
...(config?.models ?? []).map((entry) => ({ entry, source: "capability" as const })),
|
||||
...sharedModels.map((entry) => ({ entry, source: "shared" as const })),
|
||||
];
|
||||
if (entries.length === 0) return [];
|
||||
|
||||
return entries
|
||||
.filter(({ entry, source }) => {
|
||||
const caps =
|
||||
entry.capabilities && entry.capabilities.length > 0
|
||||
? entry.capabilities
|
||||
: source === "shared"
|
||||
? inferCapabilities(entry)
|
||||
: undefined;
|
||||
if (!caps || caps.length === 0) {
|
||||
if (source === "shared") {
|
||||
if (shouldLogVerbose()) {
|
||||
logVerbose(
|
||||
`Skipping shared media model without capabilities: ${entry.provider ?? entry.command ?? "unknown"}`,
|
||||
);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return caps.includes(capability);
|
||||
})
|
||||
.map(({ entry }) => entry);
|
||||
}
|
||||
|
||||
export function resolveConcurrency(cfg: ClawdbotConfig): number {
|
||||
const configured = cfg.tools?.media?.concurrency;
|
||||
if (typeof configured === "number" && Number.isFinite(configured) && configured > 0) {
|
||||
return Math.floor(configured);
|
||||
}
|
||||
return DEFAULT_MEDIA_CONCURRENCY;
|
||||
}
|
||||
|
||||
export function resolveCapabilityEnabled(params: {
|
||||
cfg: ClawdbotConfig;
|
||||
config?: MediaUnderstandingConfig;
|
||||
}): boolean {
|
||||
if (params.config?.enabled === false) return false;
|
||||
const sharedModels = params.cfg.tools?.media?.models ?? [];
|
||||
const hasModels = (params.config?.models?.length ?? 0) > 0 || sharedModels.length > 0;
|
||||
if (!hasModels) return false;
|
||||
return true;
|
||||
}
|
||||
Reference in New Issue
Block a user