diff --git a/CHANGELOG.md b/CHANGELOG.md index 33fe7c58fb9..154f00d2e95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ Docs: https://docs.openclaw.ai - Browser/default profile selection: default `browser.defaultProfile` behavior now prefers `openclaw` (managed standalone CDP) when no explicit default is configured, while still auto-provisioning the `chrome` relay profile for explicit opt-in use. (#32031) Fixes #31907. Thanks @liuxiaopai-ai. - Doctor/local memory provider checks: stop false-positive local-provider warnings when `provider=local` and no explicit `modelPath` is set by honoring default local model fallback while still warning when gateway probe reports local embeddings not ready. (#32014) Fixes #31998. Thanks @adhishthite. - Feishu/Run channel fallback: prefer `Provider` over `Surface` when inferring queued run `messageProvider` fallback (when `OriginatingChannel` is missing), preventing Feishu turns from being mislabeled as `webchat` in mixed relay metadata contexts. (#31880) Fixes #31859. Thanks @liuxiaopai-ai. +- Cron/session reaper reliability: move cron session reaper sweeps into `onTimer` `finally` and keep pruning active even when timer ticks fail early (for example cron store parse failures), preventing stale isolated run sessions from accumulating indefinitely. (#31996) Fixes #31946. Thanks @scoootscooob. - Sandbox/Docker setup command parsing: accept `agents.*.sandbox.docker.setupCommand` as either a string or a string array, and normalize arrays to newline-delimited shell scripts so multi-step setup commands no longer concatenate without separators. (#31953) Thanks @liuxiaopai-ai. - Gateway/Plugin HTTP route precedence: run explicit plugin HTTP routes before the Control UI SPA catch-all so registered plugin webhook/custom paths remain reachable, while unmatched paths still fall through to Control UI handling. (#31885) Thanks @Sid-Qin. - macOS/LaunchAgent security defaults: write `Umask=63` (octal `077`) into generated gateway launchd plists so post-update service reinstalls keep owner-only file permissions by default instead of falling back to system `022`. (#32022) Fixes #31905. Thanks @liuxiaopai-ai. diff --git a/src/cron/service.session-reaper-in-finally.test.ts b/src/cron/service.session-reaper-in-finally.test.ts new file mode 100644 index 00000000000..f590b330d44 --- /dev/null +++ b/src/cron/service.session-reaper-in-finally.test.ts @@ -0,0 +1,165 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { createNoopLogger, createCronStoreHarness } from "./service.test-harness.js"; +import { createCronServiceState } from "./service/state.js"; +import { onTimer } from "./service/timer.js"; +import { resetReaperThrottle } from "./session-reaper.js"; +import type { CronJob } from "./types.js"; + +const noopLogger = createNoopLogger(); +const { makeStorePath } = createCronStoreHarness({ + prefix: "openclaw-cron-reaper-finally-", +}); + +function createDueIsolatedJob(params: { id: string; nowMs: number }): CronJob { + return { + id: params.id, + name: params.id, + enabled: true, + deleteAfterRun: false, + createdAtMs: params.nowMs, + updatedAtMs: params.nowMs, + schedule: { kind: "every", everyMs: 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "test" }, + delivery: { mode: "none" }, + state: { nextRunAtMs: params.nowMs }, + }; +} + +describe("CronService - session reaper runs in finally block (#31946)", () => { + beforeEach(() => { + noopLogger.debug.mockClear(); + noopLogger.info.mockClear(); + noopLogger.warn.mockClear(); + noopLogger.error.mockClear(); + resetReaperThrottle(); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("session reaper runs even when job execution throws", async () => { + const store = await makeStorePath(); + const now = Date.parse("2026-02-10T10:00:00.000Z"); + + // Write a store with a due job that will trigger execution. + await fs.mkdir(path.dirname(store.storePath), { recursive: true }); + await fs.writeFile( + store.storePath, + JSON.stringify({ + version: 1, + jobs: [createDueIsolatedJob({ id: "failing-job", nowMs: now })], + }), + "utf-8", + ); + + // Create a mock sessionStorePath to track if the reaper is called. + const sessionStorePath = path.join(path.dirname(store.storePath), "sessions", "sessions.json"); + + const state = createCronServiceState({ + storePath: store.storePath, + cronEnabled: true, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + // This will throw, simulating a failure during job execution. + runIsolatedAgentJob: vi.fn().mockRejectedValue(new Error("gateway down")), + sessionStorePath, + }); + + await onTimer(state); + + // After onTimer finishes (even with a job error), state.running must be + // false — proving the finally block executed. + expect(state.running).toBe(false); + + // The timer must be re-armed. + expect(state.timer).not.toBeNull(); + }); + + it("session reaper runs when resolveSessionStorePath is provided", async () => { + const store = await makeStorePath(); + const now = Date.parse("2026-02-10T10:00:00.000Z"); + + await fs.mkdir(path.dirname(store.storePath), { recursive: true }); + await fs.writeFile( + store.storePath, + JSON.stringify({ + version: 1, + jobs: [createDueIsolatedJob({ id: "ok-job", nowMs: now })], + }), + "utf-8", + ); + + const resolvedPaths: string[] = []; + const state = createCronServiceState({ + storePath: store.storePath, + cronEnabled: true, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn().mockResolvedValue({ status: "ok", summary: "done" }), + resolveSessionStorePath: (agentId) => { + const p = path.join(path.dirname(store.storePath), `${agentId}-sessions`, "sessions.json"); + resolvedPaths.push(p); + return p; + }, + }); + + await onTimer(state); + + // The resolveSessionStorePath callback should have been invoked to build + // the set of store paths for the session reaper. + expect(resolvedPaths.length).toBeGreaterThan(0); + expect(state.running).toBe(false); + }); + + it("prunes expired cron-run sessions even when cron store load throws", async () => { + const store = await makeStorePath(); + const now = Date.parse("2026-02-10T10:00:00.000Z"); + const sessionStorePath = path.join(path.dirname(store.storePath), "sessions", "sessions.json"); + + // Force onTimer's try-block to throw before normal execution flow. + await fs.mkdir(path.dirname(store.storePath), { recursive: true }); + await fs.writeFile(store.storePath, "{invalid-json", "utf-8"); + + // Seed an expired cron-run session entry that should be pruned by the reaper. + await fs.mkdir(path.dirname(sessionStorePath), { recursive: true }); + await fs.writeFile( + sessionStorePath, + JSON.stringify({ + "agent:agent-default:cron:failing-job:run:stale": { + sessionId: "session-stale", + updatedAt: now - 3 * 24 * 3_600_000, + }, + }), + "utf-8", + ); + + const state = createCronServiceState({ + storePath: store.storePath, + cronEnabled: true, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(), + sessionStorePath, + }); + + await expect(onTimer(state)).rejects.toThrow("Failed to parse cron store"); + + const updatedSessionStore = JSON.parse(await fs.readFile(sessionStorePath, "utf-8")) as Record< + string, + unknown + >; + expect(updatedSessionStore).toEqual({}); + expect(state.running).toBe(false); + }); +}); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 3190ccbb45b..9d5e2e8becb 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -643,7 +643,11 @@ export async function onTimer(state: CronServiceState) { await persist(state); }); } + } finally { // Piggyback session reaper on timer tick (self-throttled to every 5 min). + // Placed in `finally` so the reaper runs even when a long-running job keeps + // `state.running` true across multiple timer ticks — the early return at the + // top of onTimer would otherwise skip the reaper indefinitely. const storePaths = new Set(); if (state.deps.resolveSessionStorePath) { const defaultAgentId = state.deps.defaultAgentId ?? DEFAULT_AGENT_ID; @@ -675,7 +679,7 @@ export async function onTimer(state: CronServiceState) { } } } - } finally { + state.running = false; armTimer(state); }