mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 22:18:27 +00:00
fix: harden session lock contention and cleanup
This commit is contained in:
@@ -52,6 +52,11 @@ type WatchdogState = {
|
||||
timer?: NodeJS.Timeout;
|
||||
};
|
||||
|
||||
type LockInspectionDetails = Pick<
|
||||
SessionLockInspection,
|
||||
"pid" | "pidAlive" | "createdAt" | "ageMs" | "stale" | "staleReasons"
|
||||
>;
|
||||
|
||||
const HELD_LOCKS = resolveProcessScopedMap<HeldLock>(HELD_LOCKS_KEY);
|
||||
|
||||
function resolveCleanupState(): CleanupState {
|
||||
@@ -281,10 +286,7 @@ function inspectLockPayload(
|
||||
payload: LockFilePayload | null,
|
||||
staleMs: number,
|
||||
nowMs: number,
|
||||
): Pick<
|
||||
SessionLockInspection,
|
||||
"pid" | "pidAlive" | "createdAt" | "ageMs" | "stale" | "staleReasons"
|
||||
> {
|
||||
): LockInspectionDetails {
|
||||
const pid = typeof payload?.pid === "number" ? payload.pid : null;
|
||||
const pidAlive = pid !== null ? isPidAlive(pid) : false;
|
||||
const createdAt = typeof payload?.createdAt === "string" ? payload.createdAt : null;
|
||||
@@ -313,6 +315,37 @@ function inspectLockPayload(
|
||||
};
|
||||
}
|
||||
|
||||
function lockInspectionNeedsMtimeStaleFallback(details: LockInspectionDetails): boolean {
|
||||
return (
|
||||
details.stale &&
|
||||
details.staleReasons.every(
|
||||
(reason) => reason === "missing-pid" || reason === "invalid-createdAt",
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
async function shouldReclaimContendedLockFile(
|
||||
lockPath: string,
|
||||
details: LockInspectionDetails,
|
||||
staleMs: number,
|
||||
nowMs: number,
|
||||
): Promise<boolean> {
|
||||
if (!details.stale) {
|
||||
return false;
|
||||
}
|
||||
if (!lockInspectionNeedsMtimeStaleFallback(details)) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
const stat = await fs.stat(lockPath);
|
||||
const ageMs = Math.max(0, nowMs - stat.mtimeMs);
|
||||
return ageMs > staleMs;
|
||||
} catch (error) {
|
||||
const code = (error as { code?: string } | null)?.code;
|
||||
return code !== "ENOENT";
|
||||
}
|
||||
}
|
||||
|
||||
export async function cleanStaleLockFiles(params: {
|
||||
sessionsDir: string;
|
||||
staleMs?: number;
|
||||
@@ -410,8 +443,9 @@ export async function acquireSessionWriteLock(params: {
|
||||
let attempt = 0;
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
attempt += 1;
|
||||
let handle: fs.FileHandle | null = null;
|
||||
try {
|
||||
const handle = await fs.open(lockPath, "wx");
|
||||
handle = await fs.open(lockPath, "wx");
|
||||
const createdAt = new Date().toISOString();
|
||||
await handle.writeFile(JSON.stringify({ pid: process.pid, createdAt }, null, 2), "utf8");
|
||||
const createdHeld: HeldLock = {
|
||||
@@ -428,13 +462,26 @@ export async function acquireSessionWriteLock(params: {
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
if (handle) {
|
||||
try {
|
||||
await handle.close();
|
||||
} catch {
|
||||
// Ignore cleanup errors on failed lock initialization.
|
||||
}
|
||||
try {
|
||||
await fs.rm(lockPath, { force: true });
|
||||
} catch {
|
||||
// Ignore cleanup errors on failed lock initialization.
|
||||
}
|
||||
}
|
||||
const code = (err as { code?: unknown }).code;
|
||||
if (code !== "EEXIST") {
|
||||
throw err;
|
||||
}
|
||||
const payload = await readLockPayload(lockPath);
|
||||
const inspected = inspectLockPayload(payload, staleMs, Date.now());
|
||||
if (inspected.stale) {
|
||||
const nowMs = Date.now();
|
||||
const inspected = inspectLockPayload(payload, staleMs, nowMs);
|
||||
if (await shouldReclaimContendedLockFile(lockPath, inspected, staleMs, nowMs)) {
|
||||
await fs.rm(lockPath, { force: true });
|
||||
continue;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user