Agent: guard reminder promises behind cron scheduling

2026-06-02 23:07:15 +00:00 · 2026-02-16 13:34:09 -08:00
parent 0cff8bc4e6
commit 5a26d1c622
10 changed files with 222 additions and 7 deletions
--- a/src/agents/pi-embedded-runner/run.ts
+++ b/src/agents/pi-embedded-runner/run.ts
@@ -948,6 +948,7 @@ export async function runEmbeddedPiAgent(
              didSendViaMessagingTool: attempt.didSendViaMessagingTool,
              messagingToolSentTexts: attempt.messagingToolSentTexts,
              messagingToolSentTargets: attempt.messagingToolSentTargets,
+              successfulCronAdds: attempt.successfulCronAdds,
            };
          }

@@ -989,6 +990,7 @@ export async function runEmbeddedPiAgent(
            didSendViaMessagingTool: attempt.didSendViaMessagingTool,
            messagingToolSentTexts: attempt.messagingToolSentTexts,
            messagingToolSentTargets: attempt.messagingToolSentTargets,
+            successfulCronAdds: attempt.successfulCronAdds,
          };
        }
      } finally {
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -759,6 +759,7 @@ export async function runEmbeddedAttempt(
        waitForCompactionRetry,
        getMessagingToolSentTexts,
        getMessagingToolSentTargets,
+        getSuccessfulCronAdds,
        didSendViaMessagingTool,
        getLastToolError,
        getUsageTotals,
@@ -1172,6 +1173,7 @@ export async function runEmbeddedAttempt(
        didSendViaMessagingTool: didSendViaMessagingTool(),
        messagingToolSentTexts: getMessagingToolSentTexts(),
        messagingToolSentTargets: getMessagingToolSentTargets(),
+        successfulCronAdds: getSuccessfulCronAdds(),
        cloudCodeAssistFormatError: Boolean(
          lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage),
        ),
--- a/src/agents/pi-embedded-runner/run/types.ts
+++ b/src/agents/pi-embedded-runner/run/types.ts
@@ -43,6 +43,7 @@ export type EmbeddedRunAttemptResult = {
  didSendViaMessagingTool: boolean;
  messagingToolSentTexts: string[];
  messagingToolSentTargets: MessagingToolSend[];
+  successfulCronAdds?: number;
  cloudCodeAssistFormatError: boolean;
  attemptUsage?: NormalizedUsage;
  compactionCount?: number;
--- a/src/agents/pi-embedded-runner/types.ts
+++ b/src/agents/pi-embedded-runner/types.ts
@@ -65,6 +65,8 @@ export type EmbeddedPiRunResult = {
  messagingToolSentTexts?: string[];
  // Messaging tool targets that successfully sent a message during the run.
  messagingToolSentTargets?: MessagingToolSend[];
+  // Count of successful cron.add tool calls in this run.
+  successfulCronAdds?: number;
 };

 export type EmbeddedPiCompactResult = {
--- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
@@ -1,5 +1,8 @@
 import { describe, expect, it, vi } from "vitest";
-import { handleToolExecutionStart } from "./pi-embedded-subscribe.handlers.tools.js";
+import {
+  handleToolExecutionEnd,
+  handleToolExecutionStart,
+} from "./pi-embedded-subscribe.handlers.tools.js";

 function createTestContext() {
  const onBlockReplyFlush = vi.fn();
@@ -25,6 +28,8 @@ function createTestContext() {
      messagingToolSentTexts: [],
      messagingToolSentTextsNormalized: [],
      messagingToolSentTargets: [],
+      successfulCronAdds: 0,
+      toolMetas: [],
    },
    shouldEmitToolResult: () => false,
    emitToolSummary: vi.fn(),
@@ -69,3 +74,57 @@ describe("handleToolExecutionStart read path checks", () => {
    expect(String(warn.mock.calls[0]?.[0] ?? "")).toContain("read tool called without path");
  });
 });
+
+describe("handleToolExecutionEnd cron.add commitment tracking", () => {
+  it("increments successfulCronAdds when cron add succeeds", async () => {
+    const { ctx } = createTestContext();
+    await handleToolExecutionStart(
+      ctx as never,
+      {
+        type: "tool_execution_start",
+        toolName: "cron",
+        toolCallId: "tool-cron-1",
+        args: { action: "add", job: { name: "reminder" } },
+      } as never,
+    );
+
+    await handleToolExecutionEnd(
+      ctx as never,
+      {
+        type: "tool_execution_end",
+        toolName: "cron",
+        toolCallId: "tool-cron-1",
+        isError: false,
+        result: { details: { status: "ok" } },
+      } as never,
+    );
+
+    expect(ctx.state.successfulCronAdds).toBe(1);
+  });
+
+  it("does not increment successfulCronAdds when cron add fails", async () => {
+    const { ctx } = createTestContext();
+    await handleToolExecutionStart(
+      ctx as never,
+      {
+        type: "tool_execution_start",
+        toolName: "cron",
+        toolCallId: "tool-cron-2",
+        args: { action: "add", job: { name: "reminder" } },
+      } as never,
+    );
+
+    await handleToolExecutionEnd(
+      ctx as never,
+      {
+        type: "tool_execution_end",
+        toolName: "cron",
+        toolCallId: "tool-cron-2",
+        isError: true,
+        result: { details: { status: "error" } },
+      } as never,
+    );
+
+    expect(ctx.state.successfulCronAdds).toBe(0);
+  });
+});
--- a/src/agents/pi-embedded-subscribe.handlers.tools.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts
@@ -23,6 +23,14 @@ import { normalizeToolName } from "./tool-policy.js";
 /** Track tool execution start times and args for after_tool_call hook */
 const toolStartData = new Map<string, { startTime: number; args: unknown }>();

+function isCronAddAction(args: unknown): boolean {
+  if (!args || typeof args !== "object") {
+    return false;
+  }
+  const action = (args as Record<string, unknown>).action;
+  return typeof action === "string" && action.trim().toLowerCase() === "add";
+}
+
 function buildToolCallSummary(toolName: string, args: unknown, meta?: string): ToolCallSummary {
  const mutation = buildToolMutationState(toolName, args, meta);
  return {
@@ -188,6 +196,8 @@ export async function handleToolExecutionEnd(
  const result = evt.result;
  const isToolError = isError || isToolResultError(result);
  const sanitizedResult = sanitizeToolResult(result);
+  const startData = toolStartData.get(toolCallId);
+  toolStartData.delete(toolCallId);
  const callSummary = ctx.state.toolMetaById.get(toolCallId);
  const meta = callSummary?.meta;
  ctx.state.toolMetas.push({ toolName, meta });
@@ -239,6 +249,11 @@ export async function handleToolExecutionEnd(
    }
  }

+  // Track committed reminders only when cron.add completed successfully.
+  if (!isToolError && toolName === "cron" && isCronAddAction(startData?.args)) {
+    ctx.state.successfulCronAdds += 1;
+  }
+
  emitAgentEvent({
    runId: ctx.params.runId,
    stream: "tool",
@@ -290,8 +305,6 @@ export async function handleToolExecutionEnd(
  // Run after_tool_call plugin hook (fire-and-forget)
  const hookRunnerAfter = ctx.hookRunner ?? getGlobalHookRunner();
  if (hookRunnerAfter?.hasHooks("after_tool_call")) {
-    const startData = toolStartData.get(toolCallId);
-    toolStartData.delete(toolCallId);
    const durationMs = startData?.startTime != null ? Date.now() - startData.startTime : undefined;
    const toolArgs = startData?.args;
    const hookEvent: PluginHookAfterToolCallEvent = {
@@ -310,7 +323,5 @@ export async function handleToolExecutionEnd(
      .catch((err) => {
        ctx.log.warn(`after_tool_call hook failed: tool=${toolName} error=${String(err)}`);
      });
-  } else {
-    toolStartData.delete(toolCallId);
  }
 }
--- a/src/agents/pi-embedded-subscribe.handlers.types.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.types.ts
@@ -72,6 +72,7 @@ export type EmbeddedPiSubscribeState = {
  messagingToolSentTargets: MessagingToolSend[];
  pendingMessagingTexts: Map<string, string>;
  pendingMessagingTargets: Map<string, MessagingToolSend>;
+  successfulCronAdds: number;
  lastAssistant?: AgentMessage;
 };

--- a/src/agents/pi-embedded-subscribe.ts
+++ b/src/agents/pi-embedded-subscribe.ts
@@ -73,6 +73,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    messagingToolSentTargets: [],
    pendingMessagingTexts: new Map(),
    pendingMessagingTargets: new Map(),
+    successfulCronAdds: 0,
  };
  const usageTotals = {
    input: 0,
@@ -578,6 +579,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    messagingToolSentTargets.length = 0;
    pendingMessagingTexts.clear();
    pendingMessagingTargets.clear();
+    state.successfulCronAdds = 0;
    resetAssistantMessageState(0);
  };

@@ -662,6 +664,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
    isCompactionInFlight: () => state.compactionInFlight,
    getMessagingToolSentTexts: () => messagingToolSentTexts.slice(),
    getMessagingToolSentTargets: () => messagingToolSentTargets.slice(),
+    getSuccessfulCronAdds: () => state.successfulCronAdds,
    // Returns true if any messaging tool successfully sent a message.
    // Used to suppress agent's confirmation text (e.g., "Respondi no Telegram!")
    // which is generated AFTER the tool sends the actual answer.
--- a/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
+++ b/src/auto-reply/reply/agent-runner.misc.runreplyagent.test.ts
@@ -853,6 +853,93 @@ describe("runReplyAgent messaging tool suppression", () => {
  });
 });

+describe("runReplyAgent reminder commitment guard", () => {
+  function createRun() {
+    const typing = createMockTypingController();
+    const sessionCtx = {
+      Provider: "telegram",
+      OriginatingTo: "chat",
+      AccountId: "primary",
+      MessageSid: "msg",
+      Surface: "telegram",
+    } as unknown as TemplateContext;
+    const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+    const followupRun = {
+      prompt: "hello",
+      summaryLine: "hello",
+      enqueuedAt: Date.now(),
+      run: {
+        sessionId: "session",
+        sessionKey: "main",
+        messageProvider: "telegram",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        config: {},
+        skillsSnapshot: {},
+        provider: "anthropic",
+        model: "claude",
+        thinkLevel: "low",
+        verboseLevel: "off",
+        elevatedLevel: "off",
+        bashElevated: {
+          enabled: false,
+          allowed: false,
+          defaultLevel: "off",
+        },
+        timeoutMs: 1_000,
+        blockReplyBreak: "message_end",
+      },
+    } as unknown as FollowupRun;
+
+    return runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      sessionKey: "main",
+      defaultModel: "anthropic/claude-opus-4-5",
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+  }
+
+  it("appends guard note when reminder commitment is not backed by cron.add", async () => {
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "I'll remind you tomorrow morning." }],
+      meta: {},
+      successfulCronAdds: 0,
+    });
+
+    const result = await createRun();
+    expect(result).toMatchObject({
+      text: "I'll remind you tomorrow morning.\n\nNote: I did not schedule a reminder in this turn, so this will not trigger automatically.",
+    });
+  });
+
+  it("keeps reminder commitment unchanged when cron.add succeeded", async () => {
+    runEmbeddedPiAgentMock.mockResolvedValueOnce({
+      payloads: [{ text: "I'll remind you tomorrow morning." }],
+      meta: {},
+      successfulCronAdds: 1,
+    });
+
+    const result = await createRun();
+    expect(result).toMatchObject({
+      text: "I'll remind you tomorrow morning.",
+    });
+  });
+});
+
 describe("runReplyAgent fallback reasoning tags", () => {
  type EmbeddedPiAgentParams = {
    enforceFinalTag?: boolean;
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -42,6 +42,41 @@ import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-r
 import { createTypingSignaler } from "./typing-mode.js";

 const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
+const UNSCHEDULED_REMINDER_NOTE =
+  "Note: I did not schedule a reminder in this turn, so this will not trigger automatically.";
+const REMINDER_COMMITMENT_PATTERNS: RegExp[] = [
+  /\b(?:i\s*['’]?ll|i will)\s+(?:make sure to\s+)?(?:remember|remind|ping|follow up|follow-up|check back|circle back)\b/i,
+  /\b(?:i\s*['’]?ll|i will)\s+(?:set|create|schedule)\s+(?:a\s+)?reminder\b/i,
+];
+
+function hasUnbackedReminderCommitment(text: string): boolean {
+  const normalized = text.toLowerCase();
+  if (!normalized.trim()) {
+    return false;
+  }
+  if (normalized.includes(UNSCHEDULED_REMINDER_NOTE.toLowerCase())) {
+    return false;
+  }
+  return REMINDER_COMMITMENT_PATTERNS.some((pattern) => pattern.test(text));
+}
+
+function appendUnscheduledReminderNote(payloads: ReplyPayload[]): ReplyPayload[] {
+  let appended = false;
+  return payloads.map((payload) => {
+    if (appended || payload.isError || typeof payload.text !== "string") {
+      return payload;
+    }
+    if (!hasUnbackedReminderCommitment(payload.text)) {
+      return payload;
+    }
+    appended = true;
+    const trimmed = payload.text.trimEnd();
+    return {
+      ...payload,
+      text: `${trimmed}\n\n${UNSCHEDULED_REMINDER_NOTE}`,
+    };
+  });
+}

 export async function runReplyAgent(params: {
  commandBody: string;
@@ -420,7 +455,19 @@ export async function runReplyAgent(params: {
      return finalizeWithFollowup(undefined, queueKey, runFollowupTurn);
    }

-    await signalTypingIfNeeded(replyPayloads, typingSignals);
+    const successfulCronAdds = runResult.successfulCronAdds ?? 0;
+    const hasReminderCommitment = replyPayloads.some(
+      (payload) =>
+        !payload.isError &&
+        typeof payload.text === "string" &&
+        hasUnbackedReminderCommitment(payload.text),
+    );
+    const guardedReplyPayloads =
+      hasReminderCommitment && successfulCronAdds === 0
+        ? appendUnscheduledReminderNote(replyPayloads)
+        : replyPayloads;
+
+    await signalTypingIfNeeded(guardedReplyPayloads, typingSignals);

    if (isDiagnosticsEnabled(cfg) && hasNonzeroUsage(usage)) {
      const input = usage.input ?? 0;
@@ -488,7 +535,7 @@ export async function runReplyAgent(params: {
    }

    // If verbose is enabled and this is a new session, prepend a session hint.
-    let finalPayloads = replyPayloads;
+    let finalPayloads = guardedReplyPayloads;
    const verboseEnabled = resolvedVerboseLevel !== "off";
    if (autoCompactionCompleted) {
      const count = await incrementRunCompactionCount({