mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 19:04:31 +00:00
refactor: unify media understanding pipeline
This commit is contained in:
66
src/media-understanding/providers/image.ts
Normal file
66
src/media-understanding/providers/image.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { Api, AssistantMessage, Context, Model } from "@mariozechner/pi-ai";
|
||||
import { complete } from "@mariozechner/pi-ai";
|
||||
import { discoverAuthStorage, discoverModels } from "@mariozechner/pi-coding-agent";
|
||||
|
||||
import { getApiKeyForModel } from "../../agents/model-auth.js";
|
||||
import { ensureClawdbotModelsJson } from "../../agents/models-config.js";
|
||||
import { minimaxUnderstandImage } from "../../agents/minimax-vlm.js";
|
||||
import { coerceImageAssistantText } from "../../agents/tools/image-tool.helpers.js";
|
||||
import type { ImageDescriptionRequest, ImageDescriptionResult } from "../types.js";
|
||||
|
||||
export async function describeImageWithModel(
|
||||
params: ImageDescriptionRequest,
|
||||
): Promise<ImageDescriptionResult> {
|
||||
await ensureClawdbotModelsJson(params.cfg, params.agentDir);
|
||||
const authStorage = discoverAuthStorage(params.agentDir);
|
||||
const modelRegistry = discoverModels(authStorage, params.agentDir);
|
||||
const model = modelRegistry.find(params.provider, params.model) as Model<Api> | null;
|
||||
if (!model) {
|
||||
throw new Error(`Unknown model: ${params.provider}/${params.model}`);
|
||||
}
|
||||
if (!model.input?.includes("image")) {
|
||||
throw new Error(`Model does not support images: ${params.provider}/${params.model}`);
|
||||
}
|
||||
const apiKeyInfo = await getApiKeyForModel({
|
||||
model,
|
||||
cfg: params.cfg,
|
||||
agentDir: params.agentDir,
|
||||
profileId: params.profile,
|
||||
preferredProfile: params.preferredProfile,
|
||||
});
|
||||
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
|
||||
|
||||
const base64 = params.buffer.toString("base64");
|
||||
if (model.provider === "minimax") {
|
||||
const text = await minimaxUnderstandImage({
|
||||
apiKey: apiKeyInfo.apiKey,
|
||||
prompt: params.prompt ?? "Describe the image.",
|
||||
imageDataUrl: `data:${params.mime ?? "image/jpeg"};base64,${base64}`,
|
||||
modelBaseUrl: model.baseUrl,
|
||||
});
|
||||
return { text, model: model.id };
|
||||
}
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: params.prompt ?? "Describe the image." },
|
||||
{ type: "image", data: base64, mimeType: params.mime ?? "image/jpeg" },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
const message = (await complete(model, context, {
|
||||
apiKey: apiKeyInfo.apiKey,
|
||||
maxTokens: params.maxTokens ?? 512,
|
||||
})) as AssistantMessage;
|
||||
const text = coerceImageAssistantText({
|
||||
message,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
});
|
||||
return { text, model: model.id };
|
||||
}
|
||||
Reference in New Issue
Block a user