diff --git a/CHANGELOG.md b/CHANGELOG.md index d6917e8db61..9a695f7619e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai - Security/Link understanding: block loopback/internal host patterns and private/mapped IPv6 addresses in extracted URL handling to close SSRF bypasses in link CLI flows. (#15604) Thanks @AI-Reviewer-QS. - Android/Nodes: harden `app.update` by requiring HTTPS and gateway-host URL matching plus SHA-256 verification, stream URL camera downloads to disk with size guards to avoid memory spikes, and stop signing release builds with debug keys. (#13541) Thanks @smartprogrammer93. - Auto-reply/Threading: auto-inject implicit reply threading so `replyToMode` works without requiring model-emitted `[[reply_to_current]]`, while preserving `replyToMode: "off"` behavior for implicit Slack replies and keeping block-streaming chunk coalescing stable under `replyToMode: "first"`. (#14976) Thanks @Diaspar4u. +- Auto-reply/Media: allow image-only inbound messages (no caption) to reach the agent instead of short-circuiting as empty text, and preserve thread context in queued/followup prompt bodies for media-only runs. (#11916) Thanks @arosstale. - Sandbox: pass configured `sandbox.docker.env` variables to sandbox containers at `docker create` time. (#15138) Thanks @stevebot-alive. - Gateway/Restart: clear stale command-queue and heartbeat wake runtime state after SIGUSR1 in-process restarts to prevent zombie gateway behavior where queued work stops draining. (#15195) Thanks @joeykrug. - Onboarding/CLI: restore terminal state without resuming paused `stdin`, so onboarding exits cleanly after choosing Web UI and the installer returns instead of appearing stuck. diff --git a/src/auto-reply/reply/get-reply-run.media-only.test.ts b/src/auto-reply/reply/get-reply-run.media-only.test.ts new file mode 100644 index 00000000000..942146189fa --- /dev/null +++ b/src/auto-reply/reply/get-reply-run.media-only.test.ts @@ -0,0 +1,192 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { runPreparedReply } from "./get-reply-run.js"; + +vi.mock("../../agents/auth-profiles/session-override.js", () => ({ + resolveSessionAuthProfileOverride: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock("../../agents/pi-embedded.js", () => ({ + abortEmbeddedPiRun: vi.fn().mockReturnValue(false), + isEmbeddedPiRunActive: vi.fn().mockReturnValue(false), + isEmbeddedPiRunStreaming: vi.fn().mockReturnValue(false), + resolveEmbeddedSessionLane: vi.fn().mockReturnValue("session:session-key"), +})); + +vi.mock("../../config/sessions.js", () => ({ + resolveGroupSessionKey: vi.fn().mockReturnValue(undefined), + resolveSessionFilePath: vi.fn().mockReturnValue("/tmp/session.jsonl"), + resolveSessionFilePathOptions: vi.fn().mockReturnValue({}), + updateSessionStore: vi.fn(), +})); + +vi.mock("../../globals.js", () => ({ + logVerbose: vi.fn(), +})); + +vi.mock("../../process/command-queue.js", () => ({ + clearCommandLane: vi.fn().mockReturnValue(0), + getQueueSize: vi.fn().mockReturnValue(0), +})); + +vi.mock("../../routing/session-key.js", () => ({ + normalizeMainKey: vi.fn().mockReturnValue("main"), +})); + +vi.mock("../../utils/provider-utils.js", () => ({ + isReasoningTagProvider: vi.fn().mockReturnValue(false), +})); + +vi.mock("../command-detection.js", () => ({ + hasControlCommand: vi.fn().mockReturnValue(false), +})); + +vi.mock("./agent-runner.js", () => ({ + runReplyAgent: vi.fn().mockResolvedValue({ text: "ok" }), +})); + +vi.mock("./body.js", () => ({ + applySessionHints: vi.fn().mockImplementation(async ({ baseBody }) => baseBody), +})); + +vi.mock("./groups.js", () => ({ + buildGroupIntro: vi.fn().mockReturnValue(""), +})); + +vi.mock("./inbound-meta.js", () => ({ + buildInboundMetaSystemPrompt: vi.fn().mockReturnValue(""), + buildInboundUserContextPrefix: vi.fn().mockReturnValue(""), +})); + +vi.mock("./queue.js", () => ({ + resolveQueueSettings: vi.fn().mockReturnValue({ mode: "followup" }), +})); + +vi.mock("./route-reply.js", () => ({ + routeReply: vi.fn(), +})); + +vi.mock("./session-updates.js", () => ({ + ensureSkillSnapshot: vi.fn().mockImplementation(async ({ sessionEntry, systemSent }) => ({ + sessionEntry, + systemSent, + skillsSnapshot: undefined, + })), + prependSystemEvents: vi.fn().mockImplementation(async ({ prefixedBodyBase }) => prefixedBodyBase), +})); + +vi.mock("./typing-mode.js", () => ({ + resolveTypingMode: vi.fn().mockReturnValue("off"), +})); + +import { runReplyAgent } from "./agent-runner.js"; + +function baseParams( + overrides: Partial[0]> = {}, +): Parameters[0] { + return { + ctx: { + Body: "", + RawBody: "", + CommandBody: "", + ThreadHistoryBody: "Earlier message in this thread", + OriginatingChannel: "slack", + OriginatingTo: "C123", + ChatType: "group", + }, + sessionCtx: { + Body: "", + BodyStripped: "", + ThreadHistoryBody: "Earlier message in this thread", + MediaPath: "/tmp/input.png", + Provider: "slack", + ChatType: "group", + OriginatingChannel: "slack", + OriginatingTo: "C123", + }, + cfg: { session: {}, channels: {}, agents: { defaults: {} } }, + agentId: "default", + agentDir: "/tmp/agent", + agentCfg: {}, + sessionCfg: {}, + commandAuthorized: true, + command: { + isAuthorizedSender: true, + abortKey: "session-key", + ownerList: [], + senderIsOwner: false, + } as never, + commandSource: "", + allowTextCommands: true, + directives: { + hasThinkDirective: false, + thinkLevel: undefined, + } as never, + defaultActivation: "always", + resolvedThinkLevel: "high", + resolvedVerboseLevel: "off", + resolvedReasoningLevel: "off", + resolvedElevatedLevel: "off", + elevatedEnabled: false, + elevatedAllowed: false, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + modelState: { + resolveDefaultThinkingLevel: async () => "medium", + } as never, + provider: "anthropic", + model: "claude-opus-4-1", + typing: { + onReplyStart: vi.fn().mockResolvedValue(undefined), + cleanup: vi.fn(), + } as never, + defaultProvider: "anthropic", + defaultModel: "claude-opus-4-1", + timeoutMs: 30_000, + isNewSession: true, + resetTriggered: false, + systemSent: true, + sessionKey: "session-key", + workspaceDir: "/tmp/workspace", + abortedLastRun: false, + ...overrides, + }; +} + +describe("runPreparedReply media-only handling", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("allows media-only prompts and preserves thread context in queued followups", async () => { + const result = await runPreparedReply(baseParams()); + expect(result).toEqual({ text: "ok" }); + + const call = vi.mocked(runReplyAgent).mock.calls[0]?.[0]; + expect(call).toBeTruthy(); + expect(call?.followupRun.prompt).toContain("[Thread history - for context]"); + expect(call?.followupRun.prompt).toContain("Earlier message in this thread"); + expect(call?.followupRun.prompt).toContain("[User sent media without caption]"); + }); + + it("returns the empty-body reply when there is no text and no media", async () => { + const result = await runPreparedReply( + baseParams({ + ctx: { + Body: "", + RawBody: "", + CommandBody: "", + }, + sessionCtx: { + Body: "", + BodyStripped: "", + Provider: "slack", + }, + }), + ); + + expect(result).toEqual({ + text: "I didn't receive any text in your message. Please resend or add a caption.", + }); + expect(vi.mocked(runReplyAgent)).not.toHaveBeenCalled(); + }); +}); diff --git a/src/auto-reply/reply/get-reply-run.ts b/src/auto-reply/reply/get-reply-run.ts index 5fc6acd45ff..f85446ecec9 100644 --- a/src/auto-reply/reply/get-reply-run.ts +++ b/src/auto-reply/reply/get-reply-run.ts @@ -221,7 +221,10 @@ export async function runPreparedReply( ? baseBodyFinal : [inboundUserContext, baseBodyFinal].filter(Boolean).join("\n\n"); const baseBodyTrimmed = baseBodyForPrompt.trim(); - if (!baseBodyTrimmed) { + const hasMediaAttachment = Boolean( + sessionCtx.MediaPath || (sessionCtx.MediaPaths && sessionCtx.MediaPaths.length > 0), + ); + if (!baseBodyTrimmed && !hasMediaAttachment) { await typing.onReplyStart(); logVerbose("Inbound body empty after normalization; skipping agent run"); typing.cleanup(); @@ -229,8 +232,13 @@ export async function runPreparedReply( text: "I didn't receive any text in your message. Please resend or add a caption.", }; } + // When the user sends media without text, provide a minimal body so the agent + // run proceeds and the image/document is injected by the embedded runner. + const effectiveBaseBody = baseBodyTrimmed + ? baseBodyForPrompt + : "[User sent media without caption]"; let prefixedBodyBase = await applySessionHints({ - baseBody: baseBodyForPrompt, + baseBody: effectiveBaseBody, abortedLastRun, sessionEntry, sessionStore, @@ -337,7 +345,7 @@ export async function runPreparedReply( sessionEntry, resolveSessionFilePathOptions({ agentId, storePath }), ); - const queueBodyBase = [threadContextNote, baseBodyForPrompt].filter(Boolean).join("\n\n"); + const queueBodyBase = [threadContextNote, effectiveBaseBody].filter(Boolean).join("\n\n"); const queuedBody = mediaNote ? [mediaNote, mediaReplyHint, queueBodyBase].filter(Boolean).join("\n").trim() : queueBodyBase;