mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 02:08:27 +00:00
fix(telegram): send sticker pixels to vision models
This commit is contained in:
@@ -4,11 +4,13 @@ import type { ClawdbotConfig } from "../config/config.js";
|
||||
import { STATE_DIR_CLAWDBOT } from "../config/paths.js";
|
||||
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
||||
import { logVerbose } from "../globals.js";
|
||||
import type { ModelCatalogEntry } from "../agents/model-catalog.js";
|
||||
import {
|
||||
findModelInCatalog,
|
||||
loadModelCatalog,
|
||||
modelSupportsVision,
|
||||
} from "../agents/model-catalog.js";
|
||||
import { resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||
import { resolveDefaultModelForAgent } from "../agents/model-selection.js";
|
||||
import { resolveAutoImageModel } from "../media-understanding/runner.js";
|
||||
|
||||
@@ -140,6 +142,7 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?:
|
||||
|
||||
const STICKER_DESCRIPTION_PROMPT =
|
||||
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
||||
const VISION_PROVIDERS = ["openai", "anthropic", "google", "minimax"] as const;
|
||||
|
||||
export interface DescribeStickerParams {
|
||||
imagePath: string;
|
||||
@@ -158,31 +161,80 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
|
||||
|
||||
const defaultModel = resolveDefaultModelForAgent({ cfg, agentId });
|
||||
let activeModel = undefined as { provider: string; model: string } | undefined;
|
||||
let catalog: ModelCatalogEntry[] = [];
|
||||
try {
|
||||
const catalog = await loadModelCatalog({ config: cfg });
|
||||
catalog = await loadModelCatalog({ config: cfg });
|
||||
const entry = findModelInCatalog(catalog, defaultModel.provider, defaultModel.model);
|
||||
if (modelSupportsVision(entry)) {
|
||||
const supportsVision = entry?.input ? modelSupportsVision(entry) : Boolean(entry);
|
||||
if (supportsVision) {
|
||||
activeModel = { provider: defaultModel.provider, model: defaultModel.model };
|
||||
}
|
||||
} catch {
|
||||
// Ignore catalog failures; fall back to auto selection.
|
||||
}
|
||||
|
||||
const resolved = await resolveAutoImageModel({
|
||||
cfg,
|
||||
agentDir,
|
||||
activeModel,
|
||||
});
|
||||
const hasProviderKey = async (provider: string) => {
|
||||
try {
|
||||
await resolveApiKeyForProvider({ provider, cfg, agentDir });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const selectCatalogModel = (provider: string) => {
|
||||
const entries = catalog.filter(
|
||||
(entry) =>
|
||||
entry.provider.toLowerCase() === provider.toLowerCase() &&
|
||||
(entry.input ? modelSupportsVision(entry) : true),
|
||||
);
|
||||
if (entries.length === 0) return undefined;
|
||||
const defaultId =
|
||||
provider === "openai"
|
||||
? "gpt-5-mini"
|
||||
: provider === "anthropic"
|
||||
? "claude-opus-4-5"
|
||||
: provider === "google"
|
||||
? "gemini-3-flash-preview"
|
||||
: "MiniMax-VL-01";
|
||||
const preferred = entries.find((entry) => entry.id === defaultId);
|
||||
return preferred ?? entries[0];
|
||||
};
|
||||
|
||||
let resolved = null as { provider: string; model?: string } | null;
|
||||
if (
|
||||
activeModel &&
|
||||
VISION_PROVIDERS.includes(activeModel.provider as (typeof VISION_PROVIDERS)[number]) &&
|
||||
(await hasProviderKey(activeModel.provider))
|
||||
) {
|
||||
resolved = activeModel;
|
||||
}
|
||||
|
||||
if (!resolved) {
|
||||
for (const provider of VISION_PROVIDERS) {
|
||||
if (!(await hasProviderKey(provider))) continue;
|
||||
const entry = selectCatalogModel(provider);
|
||||
if (entry) {
|
||||
resolved = { provider, model: entry.id };
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!resolved) {
|
||||
resolved = await resolveAutoImageModel({
|
||||
cfg,
|
||||
agentDir,
|
||||
activeModel,
|
||||
});
|
||||
}
|
||||
|
||||
if (!resolved?.model) {
|
||||
logVerbose("telegram: no vision provider available for sticker description");
|
||||
return null;
|
||||
}
|
||||
|
||||
const { provider, model } = resolved;
|
||||
if (!model) {
|
||||
logVerbose(`telegram: no vision model available for ${provider}`);
|
||||
return null;
|
||||
}
|
||||
logVerbose(`telegram: describing sticker with ${provider}/${model}`);
|
||||
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user