mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 15:08:25 +00:00
fix: unify session maintenance and cron run pruning (#13083)
* fix: prune stale session entries, cap entry count, and rotate sessions.json
The sessions.json file grows unbounded over time. Every heartbeat tick (default: 30m)
triggers multiple full rewrites, and session keys from groups, threads, and DMs
accumulate indefinitely with large embedded objects (skillsSnapshot,
systemPromptReport). At >50MB the synchronous JSON parse blocks the event loop,
causing Telegram webhook timeouts and effectively taking the bot down.
Three mitigations, all running inside saveSessionStoreUnlocked() on every write:
1. Prune stale entries: remove entries with updatedAt older than 30 days
(configurable via session.maintenance.pruneDays in openclaw.json)
2. Cap entry count: keep only the 500 most recently updated entries
(configurable via session.maintenance.maxEntries). Entries without updatedAt
are evicted first.
3. File rotation: if the existing sessions.json exceeds 10MB before a write,
rename it to sessions.json.bak.{timestamp} and keep only the 3 most recent
backups (configurable via session.maintenance.rotateBytes).
All three thresholds are configurable under session.maintenance in openclaw.json
with Zod validation. No env vars.
Existing tests updated to use Date.now() instead of epoch-relative timestamps
(1, 2, 3) that would be incorrectly pruned as stale.
27 new tests covering pruning, capping, rotation, and integration scenarios.
* feat: auto-prune expired cron run sessions (#12289)
Add TTL-based reaper for isolated cron run sessions that accumulate
indefinitely in sessions.json.
New config option:
cron.sessionRetention: string | false (default: '24h')
The reaper runs piggy-backed on the cron timer tick, self-throttled
to sweep at most every 5 minutes. It removes session entries matching
the pattern cron:<jobId>:run:<uuid> whose updatedAt + retention < now.
Design follows the Kubernetes ttlSecondsAfterFinished pattern:
- Sessions are persisted normally (observability/debugging)
- A periodic reaper prunes expired entries
- Configurable retention with sensible default
- Set to false to disable pruning entirely
Files changed:
- src/config/types.cron.ts: Add sessionRetention to CronConfig
- src/config/zod-schema.ts: Add Zod validation for sessionRetention
- src/cron/session-reaper.ts: New reaper module (sweepCronRunSessions)
- src/cron/session-reaper.test.ts: 12 tests covering all paths
- src/cron/service/state.ts: Add cronConfig/sessionStorePath to deps
- src/cron/service/timer.ts: Wire reaper into onTimer tick
- src/gateway/server-cron.ts: Pass config and session store path to deps
Closes #12289
* fix: sweep cron session stores per agent
* docs: add changelog for session maintenance (#13083) (thanks @skyfallsin, @Glucksberg)
* fix: add warn-only session maintenance mode
* fix: warn-only maintenance defaults to active session
* fix: deliver maintenance warnings to active session
* docs: add session maintenance examples
* fix: accept duration and size maintenance thresholds
* refactor: share cron run session key check
* fix: format issues and replace defaultRuntime.warn with console.warn
---------
Co-authored-by: Pradeep Elankumaran <pradeepe@gmail.com>
Co-authored-by: Glucksberg <markuscontasul@gmail.com>
Co-authored-by: max <40643627+quotentiroler@users.noreply.github.com>
Co-authored-by: quotentiroler <max.nussbaumer@maxhealth.tech>
This commit is contained in:
committed by
GitHub
parent
0657d7c772
commit
e19a23520c
@@ -1,3 +1,4 @@
|
||||
import type { CronConfig } from "../../config/types.cron.js";
|
||||
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
|
||||
import type { CronJob, CronJobCreate, CronJobPatch, CronStoreFile } from "../types.js";
|
||||
|
||||
@@ -26,6 +27,14 @@ export type CronServiceDeps = {
|
||||
log: Logger;
|
||||
storePath: string;
|
||||
cronEnabled: boolean;
|
||||
/** CronConfig for session retention settings. */
|
||||
cronConfig?: CronConfig;
|
||||
/** Default agent id for jobs without an agent id. */
|
||||
defaultAgentId?: string;
|
||||
/** Resolve session store path for a given agent id. */
|
||||
resolveSessionStorePath?: (agentId?: string) => string;
|
||||
/** Path to the session store (sessions.json) for reaper use. */
|
||||
sessionStorePath?: string;
|
||||
enqueueSystemEvent: (text: string, opts?: { agentId?: string }) => void;
|
||||
requestHeartbeatNow: (opts?: { reason?: string }) => void;
|
||||
runHeartbeatOnce?: (opts?: { reason?: string }) => Promise<HeartbeatRunResult>;
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import type { HeartbeatRunResult } from "../../infra/heartbeat-wake.js";
|
||||
import type { CronJob } from "../types.js";
|
||||
import type { CronEvent, CronServiceState } from "./state.js";
|
||||
import { DEFAULT_AGENT_ID } from "../../routing/session-key.js";
|
||||
import { resolveCronDeliveryPlan } from "../delivery.js";
|
||||
import { sweepCronRunSessions } from "../session-reaper.js";
|
||||
import {
|
||||
computeJobNextRunAtMs,
|
||||
nextWakeAtMs,
|
||||
@@ -273,6 +275,38 @@ export async function onTimer(state: CronServiceState) {
|
||||
await persist(state);
|
||||
});
|
||||
}
|
||||
// Piggyback session reaper on timer tick (self-throttled to every 5 min).
|
||||
const storePaths = new Set<string>();
|
||||
if (state.deps.resolveSessionStorePath) {
|
||||
const defaultAgentId = state.deps.defaultAgentId ?? DEFAULT_AGENT_ID;
|
||||
if (state.store?.jobs?.length) {
|
||||
for (const job of state.store.jobs) {
|
||||
const agentId =
|
||||
typeof job.agentId === "string" && job.agentId.trim() ? job.agentId : defaultAgentId;
|
||||
storePaths.add(state.deps.resolveSessionStorePath(agentId));
|
||||
}
|
||||
} else {
|
||||
storePaths.add(state.deps.resolveSessionStorePath(defaultAgentId));
|
||||
}
|
||||
} else if (state.deps.sessionStorePath) {
|
||||
storePaths.add(state.deps.sessionStorePath);
|
||||
}
|
||||
|
||||
if (storePaths.size > 0) {
|
||||
const nowMs = state.deps.nowMs();
|
||||
for (const storePath of storePaths) {
|
||||
try {
|
||||
await sweepCronRunSessions({
|
||||
cronConfig: state.deps.cronConfig,
|
||||
sessionStorePath: storePath,
|
||||
nowMs,
|
||||
log: state.deps.log,
|
||||
});
|
||||
} catch (err) {
|
||||
state.deps.log.warn({ err: String(err), storePath }, "cron: session reaper sweep failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
state.running = false;
|
||||
armTimer(state);
|
||||
|
||||
Reference in New Issue
Block a user