mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 18:28:26 +00:00
fix(telegram): improve sticker vision + cache (#2548) (thanks @longjos)
This commit is contained in:
@@ -139,6 +139,7 @@ export const dispatchTelegramMessage = async ({
|
||||
imagePath: ctxPayload.MediaPath,
|
||||
cfg,
|
||||
agentDir,
|
||||
agentId: route.agentId,
|
||||
});
|
||||
if (description) {
|
||||
// Format the description with sticker context
|
||||
|
||||
@@ -7,6 +7,9 @@ const middlewareUseSpy = vi.fn();
|
||||
const onSpy = vi.fn();
|
||||
const stopSpy = vi.fn();
|
||||
const sendChatActionSpy = vi.fn();
|
||||
const cacheStickerSpy = vi.fn();
|
||||
const getCachedStickerSpy = vi.fn();
|
||||
const describeStickerImageSpy = vi.fn();
|
||||
|
||||
type ApiStub = {
|
||||
config: { use: (arg: unknown) => void };
|
||||
@@ -79,6 +82,12 @@ vi.mock("../config/sessions.js", async (importOriginal) => {
|
||||
};
|
||||
});
|
||||
|
||||
vi.mock("./sticker-cache.js", () => ({
|
||||
cacheSticker: (...args: unknown[]) => cacheStickerSpy(...args),
|
||||
getCachedSticker: (...args: unknown[]) => getCachedStickerSpy(...args),
|
||||
describeStickerImage: (...args: unknown[]) => describeStickerImageSpy(...args),
|
||||
}));
|
||||
|
||||
vi.mock("./pairing-store.js", () => ({
|
||||
readTelegramAllowFromStore: vi.fn(async () => [] as string[]),
|
||||
upsertTelegramPairingRequest: vi.fn(async () => ({
|
||||
@@ -408,6 +417,12 @@ describe("telegram media groups", () => {
|
||||
describe("telegram stickers", () => {
|
||||
const STICKER_TEST_TIMEOUT_MS = process.platform === "win32" ? 30_000 : 20_000;
|
||||
|
||||
beforeEach(() => {
|
||||
cacheStickerSpy.mockReset();
|
||||
getCachedStickerSpy.mockReset();
|
||||
describeStickerImageSpy.mockReset();
|
||||
});
|
||||
|
||||
it(
|
||||
"downloads static sticker (WEBP) and includes sticker metadata",
|
||||
async () => {
|
||||
@@ -481,6 +496,88 @@ describe("telegram stickers", () => {
|
||||
STICKER_TEST_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
it(
|
||||
"refreshes cached sticker metadata on cache hit",
|
||||
async () => {
|
||||
const { createTelegramBot } = await import("./bot.js");
|
||||
const replyModule = await import("../auto-reply/reply.js");
|
||||
const replySpy = replyModule.__replySpy as unknown as ReturnType<typeof vi.fn>;
|
||||
|
||||
onSpy.mockReset();
|
||||
replySpy.mockReset();
|
||||
sendChatActionSpy.mockReset();
|
||||
|
||||
getCachedStickerSpy.mockReturnValue({
|
||||
fileId: "old_file_id",
|
||||
fileUniqueId: "sticker_unique_456",
|
||||
emoji: "😴",
|
||||
setName: "OldSet",
|
||||
description: "Cached description",
|
||||
cachedAt: "2026-01-20T10:00:00.000Z",
|
||||
});
|
||||
|
||||
const runtimeError = vi.fn();
|
||||
createTelegramBot({
|
||||
token: "tok",
|
||||
runtime: {
|
||||
log: vi.fn(),
|
||||
error: runtimeError,
|
||||
exit: () => {
|
||||
throw new Error("exit");
|
||||
},
|
||||
},
|
||||
});
|
||||
const handler = onSpy.mock.calls.find((call) => call[0] === "message")?.[1] as (
|
||||
ctx: Record<string, unknown>,
|
||||
) => Promise<void>;
|
||||
expect(handler).toBeDefined();
|
||||
|
||||
const fetchSpy = vi.spyOn(globalThis, "fetch" as never).mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: { get: () => "image/webp" },
|
||||
arrayBuffer: async () => new Uint8Array([0x52, 0x49, 0x46, 0x46]).buffer,
|
||||
} as Response);
|
||||
|
||||
await handler({
|
||||
message: {
|
||||
message_id: 103,
|
||||
chat: { id: 1234, type: "private" },
|
||||
sticker: {
|
||||
file_id: "new_file_id",
|
||||
file_unique_id: "sticker_unique_456",
|
||||
type: "regular",
|
||||
width: 512,
|
||||
height: 512,
|
||||
is_animated: false,
|
||||
is_video: false,
|
||||
emoji: "🔥",
|
||||
set_name: "NewSet",
|
||||
},
|
||||
date: 1736380800,
|
||||
},
|
||||
me: { username: "clawdbot_bot" },
|
||||
getFile: async () => ({ file_path: "stickers/sticker.webp" }),
|
||||
});
|
||||
|
||||
expect(runtimeError).not.toHaveBeenCalled();
|
||||
expect(cacheStickerSpy).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
fileId: "new_file_id",
|
||||
emoji: "🔥",
|
||||
setName: "NewSet",
|
||||
}),
|
||||
);
|
||||
const payload = replySpy.mock.calls[0][0];
|
||||
expect(payload.Sticker?.fileId).toBe("new_file_id");
|
||||
expect(payload.Sticker?.cachedDescription).toBe("Cached description");
|
||||
|
||||
fetchSpy.mockRestore();
|
||||
},
|
||||
STICKER_TEST_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
it(
|
||||
"skips animated stickers (TGS format)",
|
||||
async () => {
|
||||
|
||||
@@ -22,7 +22,7 @@ import { buildInlineKeyboard } from "../send.js";
|
||||
import { resolveTelegramVoiceSend } from "../voice.js";
|
||||
import { buildTelegramThreadParams, resolveTelegramReplyId } from "./helpers.js";
|
||||
import type { StickerMetadata, TelegramContext } from "./types.js";
|
||||
import { getCachedSticker } from "../sticker-cache.js";
|
||||
import { cacheSticker, getCachedSticker } from "../sticker-cache.js";
|
||||
|
||||
const PARSE_ERR_RE = /can't parse entities|parse entities|find end of the entity/i;
|
||||
const VOICE_FORBIDDEN_RE = /VOICE_MESSAGES_FORBIDDEN/;
|
||||
@@ -303,14 +303,26 @@ export async function resolveMedia(
|
||||
const cached = sticker.file_unique_id ? getCachedSticker(sticker.file_unique_id) : null;
|
||||
if (cached) {
|
||||
logVerbose(`telegram: sticker cache hit for ${sticker.file_unique_id}`);
|
||||
const fileId = sticker.file_id ?? cached.fileId;
|
||||
const emoji = sticker.emoji ?? cached.emoji;
|
||||
const setName = sticker.set_name ?? cached.setName;
|
||||
if (fileId !== cached.fileId || emoji !== cached.emoji || setName !== cached.setName) {
|
||||
// Refresh cached sticker metadata on hits so sends/searches use latest file_id.
|
||||
cacheSticker({
|
||||
...cached,
|
||||
fileId,
|
||||
emoji,
|
||||
setName,
|
||||
});
|
||||
}
|
||||
return {
|
||||
path: saved.path,
|
||||
contentType: saved.contentType,
|
||||
placeholder: "<media:sticker>",
|
||||
stickerMetadata: {
|
||||
emoji: cached.emoji,
|
||||
setName: cached.setName,
|
||||
fileId: cached.fileId,
|
||||
emoji,
|
||||
setName,
|
||||
fileId,
|
||||
fileUniqueId: sticker.file_unique_id,
|
||||
cachedDescription: cached.description,
|
||||
},
|
||||
@@ -330,7 +342,7 @@ export async function resolveMedia(
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
logVerbose(`telegram: failed to process sticker: ${err}`);
|
||||
logVerbose(`telegram: failed to process sticker: ${String(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,13 @@ import type { ClawdbotConfig } from "../config/config.js";
|
||||
import { STATE_DIR_CLAWDBOT } from "../config/paths.js";
|
||||
import { loadJsonFile, saveJsonFile } from "../infra/json-file.js";
|
||||
import { logVerbose } from "../globals.js";
|
||||
import { resolveApiKeyForProvider } from "../agents/model-auth.js";
|
||||
import {
|
||||
findModelInCatalog,
|
||||
loadModelCatalog,
|
||||
modelSupportsVision,
|
||||
} from "../agents/model-catalog.js";
|
||||
import { resolveDefaultModelForAgent } from "../agents/model-selection.js";
|
||||
import { resolveAutoImageModel } from "../media-understanding/runner.js";
|
||||
|
||||
const CACHE_FILE = path.join(STATE_DIR_CLAWDBOT, "telegram", "sticker-cache.json");
|
||||
const CACHE_VERSION = 1;
|
||||
@@ -135,18 +141,11 @@ export function getCacheStats(): { count: number; oldestAt?: string; newestAt?:
|
||||
const STICKER_DESCRIPTION_PROMPT =
|
||||
"Describe this sticker image in 1-2 sentences. Focus on what the sticker depicts (character, object, action, emotion). Be concise and objective.";
|
||||
|
||||
const VISION_PROVIDERS = ["anthropic", "openai", "google", "minimax"] as const;
|
||||
const DEFAULT_VISION_MODELS: Record<string, string> = {
|
||||
anthropic: "claude-sonnet-4-20250514",
|
||||
openai: "gpt-4o-mini",
|
||||
google: "gemini-2.0-flash",
|
||||
minimax: "MiniMax-VL-01",
|
||||
};
|
||||
|
||||
export interface DescribeStickerParams {
|
||||
imagePath: string;
|
||||
cfg: ClawdbotConfig;
|
||||
agentDir?: string;
|
||||
agentId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -155,26 +154,35 @@ export interface DescribeStickerParams {
|
||||
* Returns null if no vision provider is available.
|
||||
*/
|
||||
export async function describeStickerImage(params: DescribeStickerParams): Promise<string | null> {
|
||||
const { imagePath, cfg, agentDir } = params;
|
||||
const { imagePath, cfg, agentDir, agentId } = params;
|
||||
|
||||
// Find a vision provider with available API key
|
||||
let provider: string | null = null;
|
||||
for (const p of VISION_PROVIDERS) {
|
||||
try {
|
||||
await resolveApiKeyForProvider({ provider: p, cfg, agentDir });
|
||||
provider = p;
|
||||
break;
|
||||
} catch {
|
||||
// No key for this provider, try next
|
||||
const defaultModel = resolveDefaultModelForAgent({ cfg, agentId });
|
||||
let activeModel = undefined as { provider: string; model: string } | undefined;
|
||||
try {
|
||||
const catalog = await loadModelCatalog({ config: cfg });
|
||||
const entry = findModelInCatalog(catalog, defaultModel.provider, defaultModel.model);
|
||||
if (modelSupportsVision(entry)) {
|
||||
activeModel = { provider: defaultModel.provider, model: defaultModel.model };
|
||||
}
|
||||
} catch {
|
||||
// Ignore catalog failures; fall back to auto selection.
|
||||
}
|
||||
|
||||
if (!provider) {
|
||||
const resolved = await resolveAutoImageModel({
|
||||
cfg,
|
||||
agentDir,
|
||||
activeModel,
|
||||
});
|
||||
if (!resolved) {
|
||||
logVerbose("telegram: no vision provider available for sticker description");
|
||||
return null;
|
||||
}
|
||||
|
||||
const model = DEFAULT_VISION_MODELS[provider];
|
||||
const { provider, model } = resolved;
|
||||
if (!model) {
|
||||
logVerbose(`telegram: no vision model available for ${provider}`);
|
||||
return null;
|
||||
}
|
||||
logVerbose(`telegram: describing sticker with ${provider}/${model}`);
|
||||
|
||||
try {
|
||||
@@ -195,7 +203,7 @@ export async function describeStickerImage(params: DescribeStickerParams): Promi
|
||||
});
|
||||
return result.text;
|
||||
} catch (err) {
|
||||
logVerbose(`telegram: failed to describe sticker: ${err}`);
|
||||
logVerbose(`telegram: failed to describe sticker: ${String(err)}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user