Agent: guard reminder promises behind cron scheduling

This commit is contained in:
Vignesh Natarajan
2026-02-16 13:34:09 -08:00
committed by Vignesh
parent 0cff8bc4e6
commit 5a26d1c622
10 changed files with 222 additions and 7 deletions

View File

@@ -948,6 +948,7 @@ export async function runEmbeddedPiAgent(
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
@@ -989,6 +990,7 @@ export async function runEmbeddedPiAgent(
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
messagingToolSentTexts: attempt.messagingToolSentTexts,
messagingToolSentTargets: attempt.messagingToolSentTargets,
successfulCronAdds: attempt.successfulCronAdds,
};
}
} finally {

View File

@@ -759,6 +759,7 @@ export async function runEmbeddedAttempt(
waitForCompactionRetry,
getMessagingToolSentTexts,
getMessagingToolSentTargets,
getSuccessfulCronAdds,
didSendViaMessagingTool,
getLastToolError,
getUsageTotals,
@@ -1172,6 +1173,7 @@ export async function runEmbeddedAttempt(
didSendViaMessagingTool: didSendViaMessagingTool(),
messagingToolSentTexts: getMessagingToolSentTexts(),
messagingToolSentTargets: getMessagingToolSentTargets(),
successfulCronAdds: getSuccessfulCronAdds(),
cloudCodeAssistFormatError: Boolean(
lastAssistant?.errorMessage && isCloudCodeAssistFormatError(lastAssistant.errorMessage),
),

View File

@@ -43,6 +43,7 @@ export type EmbeddedRunAttemptResult = {
didSendViaMessagingTool: boolean;
messagingToolSentTexts: string[];
messagingToolSentTargets: MessagingToolSend[];
successfulCronAdds?: number;
cloudCodeAssistFormatError: boolean;
attemptUsage?: NormalizedUsage;
compactionCount?: number;

View File

@@ -65,6 +65,8 @@ export type EmbeddedPiRunResult = {
messagingToolSentTexts?: string[];
// Messaging tool targets that successfully sent a message during the run.
messagingToolSentTargets?: MessagingToolSend[];
// Count of successful cron.add tool calls in this run.
successfulCronAdds?: number;
};
export type EmbeddedPiCompactResult = {

View File

@@ -1,5 +1,8 @@
import { describe, expect, it, vi } from "vitest";
import { handleToolExecutionStart } from "./pi-embedded-subscribe.handlers.tools.js";
import {
handleToolExecutionEnd,
handleToolExecutionStart,
} from "./pi-embedded-subscribe.handlers.tools.js";
function createTestContext() {
const onBlockReplyFlush = vi.fn();
@@ -25,6 +28,8 @@ function createTestContext() {
messagingToolSentTexts: [],
messagingToolSentTextsNormalized: [],
messagingToolSentTargets: [],
successfulCronAdds: 0,
toolMetas: [],
},
shouldEmitToolResult: () => false,
emitToolSummary: vi.fn(),
@@ -69,3 +74,57 @@ describe("handleToolExecutionStart read path checks", () => {
expect(String(warn.mock.calls[0]?.[0] ?? "")).toContain("read tool called without path");
});
});
describe("handleToolExecutionEnd cron.add commitment tracking", () => {
it("increments successfulCronAdds when cron add succeeds", async () => {
const { ctx } = createTestContext();
await handleToolExecutionStart(
ctx as never,
{
type: "tool_execution_start",
toolName: "cron",
toolCallId: "tool-cron-1",
args: { action: "add", job: { name: "reminder" } },
} as never,
);
await handleToolExecutionEnd(
ctx as never,
{
type: "tool_execution_end",
toolName: "cron",
toolCallId: "tool-cron-1",
isError: false,
result: { details: { status: "ok" } },
} as never,
);
expect(ctx.state.successfulCronAdds).toBe(1);
});
it("does not increment successfulCronAdds when cron add fails", async () => {
const { ctx } = createTestContext();
await handleToolExecutionStart(
ctx as never,
{
type: "tool_execution_start",
toolName: "cron",
toolCallId: "tool-cron-2",
args: { action: "add", job: { name: "reminder" } },
} as never,
);
await handleToolExecutionEnd(
ctx as never,
{
type: "tool_execution_end",
toolName: "cron",
toolCallId: "tool-cron-2",
isError: true,
result: { details: { status: "error" } },
} as never,
);
expect(ctx.state.successfulCronAdds).toBe(0);
});
});

View File

@@ -23,6 +23,14 @@ import { normalizeToolName } from "./tool-policy.js";
/** Track tool execution start times and args for after_tool_call hook */
const toolStartData = new Map<string, { startTime: number; args: unknown }>();
function isCronAddAction(args: unknown): boolean {
if (!args || typeof args !== "object") {
return false;
}
const action = (args as Record<string, unknown>).action;
return typeof action === "string" && action.trim().toLowerCase() === "add";
}
function buildToolCallSummary(toolName: string, args: unknown, meta?: string): ToolCallSummary {
const mutation = buildToolMutationState(toolName, args, meta);
return {
@@ -188,6 +196,8 @@ export async function handleToolExecutionEnd(
const result = evt.result;
const isToolError = isError || isToolResultError(result);
const sanitizedResult = sanitizeToolResult(result);
const startData = toolStartData.get(toolCallId);
toolStartData.delete(toolCallId);
const callSummary = ctx.state.toolMetaById.get(toolCallId);
const meta = callSummary?.meta;
ctx.state.toolMetas.push({ toolName, meta });
@@ -239,6 +249,11 @@ export async function handleToolExecutionEnd(
}
}
// Track committed reminders only when cron.add completed successfully.
if (!isToolError && toolName === "cron" && isCronAddAction(startData?.args)) {
ctx.state.successfulCronAdds += 1;
}
emitAgentEvent({
runId: ctx.params.runId,
stream: "tool",
@@ -290,8 +305,6 @@ export async function handleToolExecutionEnd(
// Run after_tool_call plugin hook (fire-and-forget)
const hookRunnerAfter = ctx.hookRunner ?? getGlobalHookRunner();
if (hookRunnerAfter?.hasHooks("after_tool_call")) {
const startData = toolStartData.get(toolCallId);
toolStartData.delete(toolCallId);
const durationMs = startData?.startTime != null ? Date.now() - startData.startTime : undefined;
const toolArgs = startData?.args;
const hookEvent: PluginHookAfterToolCallEvent = {
@@ -310,7 +323,5 @@ export async function handleToolExecutionEnd(
.catch((err) => {
ctx.log.warn(`after_tool_call hook failed: tool=${toolName} error=${String(err)}`);
});
} else {
toolStartData.delete(toolCallId);
}
}

View File

@@ -72,6 +72,7 @@ export type EmbeddedPiSubscribeState = {
messagingToolSentTargets: MessagingToolSend[];
pendingMessagingTexts: Map<string, string>;
pendingMessagingTargets: Map<string, MessagingToolSend>;
successfulCronAdds: number;
lastAssistant?: AgentMessage;
};

View File

@@ -73,6 +73,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
messagingToolSentTargets: [],
pendingMessagingTexts: new Map(),
pendingMessagingTargets: new Map(),
successfulCronAdds: 0,
};
const usageTotals = {
input: 0,
@@ -578,6 +579,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
messagingToolSentTargets.length = 0;
pendingMessagingTexts.clear();
pendingMessagingTargets.clear();
state.successfulCronAdds = 0;
resetAssistantMessageState(0);
};
@@ -662,6 +664,7 @@ export function subscribeEmbeddedPiSession(params: SubscribeEmbeddedPiSessionPar
isCompactionInFlight: () => state.compactionInFlight,
getMessagingToolSentTexts: () => messagingToolSentTexts.slice(),
getMessagingToolSentTargets: () => messagingToolSentTargets.slice(),
getSuccessfulCronAdds: () => state.successfulCronAdds,
// Returns true if any messaging tool successfully sent a message.
// Used to suppress agent's confirmation text (e.g., "Respondi no Telegram!")
// which is generated AFTER the tool sends the actual answer.

View File

@@ -853,6 +853,93 @@ describe("runReplyAgent messaging tool suppression", () => {
});
});
describe("runReplyAgent reminder commitment guard", () => {
function createRun() {
const typing = createMockTypingController();
const sessionCtx = {
Provider: "telegram",
OriginatingTo: "chat",
AccountId: "primary",
MessageSid: "msg",
Surface: "telegram",
} as unknown as TemplateContext;
const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
const followupRun = {
prompt: "hello",
summaryLine: "hello",
enqueuedAt: Date.now(),
run: {
sessionId: "session",
sessionKey: "main",
messageProvider: "telegram",
sessionFile: "/tmp/session.jsonl",
workspaceDir: "/tmp",
config: {},
skillsSnapshot: {},
provider: "anthropic",
model: "claude",
thinkLevel: "low",
verboseLevel: "off",
elevatedLevel: "off",
bashElevated: {
enabled: false,
allowed: false,
defaultLevel: "off",
},
timeoutMs: 1_000,
blockReplyBreak: "message_end",
},
} as unknown as FollowupRun;
return runReplyAgent({
commandBody: "hello",
followupRun,
queueKey: "main",
resolvedQueue,
shouldSteer: false,
shouldFollowup: false,
isActive: false,
isStreaming: false,
typing,
sessionCtx,
sessionKey: "main",
defaultModel: "anthropic/claude-opus-4-5",
resolvedVerboseLevel: "off",
isNewSession: false,
blockStreamingEnabled: false,
resolvedBlockStreamingBreak: "message_end",
shouldInjectGroupIntro: false,
typingMode: "instant",
});
}
it("appends guard note when reminder commitment is not backed by cron.add", async () => {
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "I'll remind you tomorrow morning." }],
meta: {},
successfulCronAdds: 0,
});
const result = await createRun();
expect(result).toMatchObject({
text: "I'll remind you tomorrow morning.\n\nNote: I did not schedule a reminder in this turn, so this will not trigger automatically.",
});
});
it("keeps reminder commitment unchanged when cron.add succeeded", async () => {
runEmbeddedPiAgentMock.mockResolvedValueOnce({
payloads: [{ text: "I'll remind you tomorrow morning." }],
meta: {},
successfulCronAdds: 1,
});
const result = await createRun();
expect(result).toMatchObject({
text: "I'll remind you tomorrow morning.",
});
});
});
describe("runReplyAgent fallback reasoning tags", () => {
type EmbeddedPiAgentParams = {
enforceFinalTag?: boolean;

View File

@@ -42,6 +42,41 @@ import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-r
import { createTypingSignaler } from "./typing-mode.js";
const BLOCK_REPLY_SEND_TIMEOUT_MS = 15_000;
const UNSCHEDULED_REMINDER_NOTE =
"Note: I did not schedule a reminder in this turn, so this will not trigger automatically.";
const REMINDER_COMMITMENT_PATTERNS: RegExp[] = [
/\b(?:i\s*[']?ll|i will)\s+(?:make sure to\s+)?(?:remember|remind|ping|follow up|follow-up|check back|circle back)\b/i,
/\b(?:i\s*[']?ll|i will)\s+(?:set|create|schedule)\s+(?:a\s+)?reminder\b/i,
];
function hasUnbackedReminderCommitment(text: string): boolean {
const normalized = text.toLowerCase();
if (!normalized.trim()) {
return false;
}
if (normalized.includes(UNSCHEDULED_REMINDER_NOTE.toLowerCase())) {
return false;
}
return REMINDER_COMMITMENT_PATTERNS.some((pattern) => pattern.test(text));
}
function appendUnscheduledReminderNote(payloads: ReplyPayload[]): ReplyPayload[] {
let appended = false;
return payloads.map((payload) => {
if (appended || payload.isError || typeof payload.text !== "string") {
return payload;
}
if (!hasUnbackedReminderCommitment(payload.text)) {
return payload;
}
appended = true;
const trimmed = payload.text.trimEnd();
return {
...payload,
text: `${trimmed}\n\n${UNSCHEDULED_REMINDER_NOTE}`,
};
});
}
export async function runReplyAgent(params: {
commandBody: string;
@@ -420,7 +455,19 @@ export async function runReplyAgent(params: {
return finalizeWithFollowup(undefined, queueKey, runFollowupTurn);
}
await signalTypingIfNeeded(replyPayloads, typingSignals);
const successfulCronAdds = runResult.successfulCronAdds ?? 0;
const hasReminderCommitment = replyPayloads.some(
(payload) =>
!payload.isError &&
typeof payload.text === "string" &&
hasUnbackedReminderCommitment(payload.text),
);
const guardedReplyPayloads =
hasReminderCommitment && successfulCronAdds === 0
? appendUnscheduledReminderNote(replyPayloads)
: replyPayloads;
await signalTypingIfNeeded(guardedReplyPayloads, typingSignals);
if (isDiagnosticsEnabled(cfg) && hasNonzeroUsage(usage)) {
const input = usage.input ?? 0;
@@ -488,7 +535,7 @@ export async function runReplyAgent(params: {
}
// If verbose is enabled and this is a new session, prepend a session hint.
let finalPayloads = replyPayloads;
let finalPayloads = guardedReplyPayloads;
const verboseEnabled = resolvedVerboseLevel !== "off";
if (autoCompactionCompleted) {
const count = await incrementRunCompactionCount({