diff --git a/extensions/memory-neo4j/config.ts b/extensions/memory-neo4j/config.ts index 98be192de13..51e1486727c 100644 --- a/extensions/memory-neo4j/config.ts +++ b/extensions/memory-neo4j/config.ts @@ -24,6 +24,13 @@ export type MemoryNeo4jConfig = { coreMemory: { enabled: boolean; maxEntries: number; + /** + * Re-inject core memories when context usage reaches this percentage (0-100). + * Helps counter "lost in the middle" phenomenon by refreshing core memories + * closer to the end of context for recency bias. + * Set to null/undefined to disable (default). + */ + refreshAtContextPercent?: number; }; }; @@ -182,9 +189,21 @@ export const memoryNeo4jConfigSchema = { // Parse coreMemory section (optional with defaults) const coreMemoryRaw = cfg.coreMemory as Record | undefined; + assertAllowedKeys( + coreMemoryRaw ?? {}, + ["enabled", "maxEntries", "refreshAtContextPercent"], + "coreMemory config", + ); const coreMemoryEnabled = coreMemoryRaw?.enabled !== false; // enabled by default const coreMemoryMaxEntries = typeof coreMemoryRaw?.maxEntries === "number" ? coreMemoryRaw.maxEntries : 50; + // refreshAtContextPercent: number between 0-100, or undefined to disable + const refreshAtContextPercent = + typeof coreMemoryRaw?.refreshAtContextPercent === "number" && + coreMemoryRaw.refreshAtContextPercent > 0 && + coreMemoryRaw.refreshAtContextPercent <= 100 + ? coreMemoryRaw.refreshAtContextPercent + : undefined; return { neo4j: { @@ -203,6 +222,7 @@ export const memoryNeo4jConfigSchema = { coreMemory: { enabled: coreMemoryEnabled, maxEntries: coreMemoryMaxEntries, + refreshAtContextPercent, }, }; }, diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts index a6671b47063..cd114274e18 100644 --- a/extensions/memory-neo4j/index.ts +++ b/extensions/memory-neo4j/index.ts @@ -606,18 +606,81 @@ const memoryNeo4jPlugin = { // memories after restarts. const bootstrappedSessions = new Set(); - // After compaction: clear bootstrap flag so core memories get re-injected + // Track mid-session refresh: maps sessionKey → tokens at last refresh + // Used to avoid refreshing too frequently (only refresh after significant context growth) + const midSessionRefreshAt = new Map(); + const MIN_TOKENS_SINCE_REFRESH = 10_000; // Only refresh if context grew by 10k+ tokens + + // After compaction: clear bootstrap flag and mid-session refresh tracking if (cfg.coreMemory.enabled) { api.on("after_compaction", async (_event, ctx) => { if (ctx.sessionKey) { bootstrappedSessions.delete(ctx.sessionKey); + midSessionRefreshAt.delete(ctx.sessionKey); api.logger.info?.( - `memory-neo4j: cleared bootstrap flag for session ${ctx.sessionKey} after compaction`, + `memory-neo4j: cleared bootstrap/refresh flags for session ${ctx.sessionKey} after compaction`, ); } }); } + // Mid-session core memory refresh: re-inject core memories when context grows past threshold + // This counters the "lost in the middle" phenomenon by placing core memories closer to end of context + const refreshThreshold = cfg.coreMemory.refreshAtContextPercent; + if (cfg.coreMemory.enabled && refreshThreshold) { + api.logger.debug?.( + `memory-neo4j: registering before_agent_start hook for mid-session core refresh at ${refreshThreshold}%`, + ); + api.on("before_agent_start", async (event, ctx) => { + // Skip if context info not available + if (!event.contextWindowTokens || !event.estimatedUsedTokens) { + return; + } + + const sessionKey = ctx.sessionKey ?? ""; + const agentId = ctx.agentId || "default"; + const usagePercent = (event.estimatedUsedTokens / event.contextWindowTokens) * 100; + + // Only refresh if we've crossed the threshold + if (usagePercent < refreshThreshold) { + return; + } + + // Check if we've already refreshed recently (prevent over-refreshing) + const lastRefreshTokens = midSessionRefreshAt.get(sessionKey) ?? 0; + const tokensSinceRefresh = event.estimatedUsedTokens - lastRefreshTokens; + if (tokensSinceRefresh < MIN_TOKENS_SINCE_REFRESH) { + api.logger.debug?.( + `memory-neo4j: skipping mid-session refresh (only ${tokensSinceRefresh} tokens since last refresh)`, + ); + return; + } + + try { + const maxEntries = cfg.coreMemory.maxEntries; + const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId); + + if (coreMemories.length === 0) { + return; + } + + // Record this refresh + midSessionRefreshAt.set(sessionKey, event.estimatedUsedTokens); + + const content = coreMemories.map((m) => `- ${m.text}`).join("\n"); + api.logger.info?.( + `memory-neo4j: mid-session core refresh at ${usagePercent.toFixed(1)}% context (${coreMemories.length} memories)`, + ); + + return { + prependContext: `\nReminder of persistent context (you may have seen this earlier, re-stating for recency):\n${content}\n`, + }; + } catch (err) { + api.logger.warn(`memory-neo4j: mid-session core refresh failed: ${String(err)}`); + } + }); + } + // Auto-recall: inject relevant memories before agent starts api.logger.debug?.(`memory-neo4j: autoRecall=${cfg.autoRecall}`); if (cfg.autoRecall) { diff --git a/extensions/memory-neo4j/mid-session-refresh.test.ts b/extensions/memory-neo4j/mid-session-refresh.test.ts new file mode 100644 index 00000000000..0ee03a67eb7 --- /dev/null +++ b/extensions/memory-neo4j/mid-session-refresh.test.ts @@ -0,0 +1,117 @@ +/** + * Tests for mid-session core memory refresh feature. + * + * Verifies that core memories are re-injected when context usage exceeds threshold. + */ + +import { describe, it, expect, vi, beforeEach } from "vitest"; +import type { MemoryNeo4jConfig } from "./config.js"; + +describe("mid-session core memory refresh", () => { + // Test context threshold calculation + describe("context threshold calculation", () => { + it("should calculate usage percentage correctly", () => { + const contextWindowTokens = 200_000; + const estimatedUsedTokens = 100_000; + const usagePercent = (estimatedUsedTokens / contextWindowTokens) * 100; + expect(usagePercent).toBe(50); + }); + + it("should detect when threshold is exceeded", () => { + const threshold = 50; + const usagePercent = 55; + expect(usagePercent >= threshold).toBe(true); + }); + + it("should not trigger when below threshold", () => { + const threshold = 50; + const usagePercent = 45; + expect(usagePercent >= threshold).toBe(false); + }); + }); + + // Test refresh frequency limiting + describe("refresh frequency limiting", () => { + const MIN_TOKENS_SINCE_REFRESH = 10_000; + + it("should allow refresh when enough tokens have accumulated", () => { + const lastRefreshTokens = 50_000; + const currentTokens = 65_000; + const tokensSinceRefresh = currentTokens - lastRefreshTokens; + expect(tokensSinceRefresh >= MIN_TOKENS_SINCE_REFRESH).toBe(true); + }); + + it("should block refresh when not enough tokens have accumulated", () => { + const lastRefreshTokens = 50_000; + const currentTokens = 55_000; + const tokensSinceRefresh = currentTokens - lastRefreshTokens; + expect(tokensSinceRefresh >= MIN_TOKENS_SINCE_REFRESH).toBe(false); + }); + + it("should allow first refresh (no previous refresh)", () => { + const lastRefreshTokens = 0; // No previous refresh + const currentTokens = 100_000; + const tokensSinceRefresh = currentTokens - lastRefreshTokens; + expect(tokensSinceRefresh >= MIN_TOKENS_SINCE_REFRESH).toBe(true); + }); + }); + + // Test config parsing + describe("config parsing", () => { + it("should accept valid refreshAtContextPercent values", async () => { + const { memoryNeo4jConfigSchema } = await import("./config.js"); + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", user: "neo4j", password: "test" }, + embedding: { provider: "ollama" }, + coreMemory: { refreshAtContextPercent: 50 }, + }); + expect(config.coreMemory.refreshAtContextPercent).toBe(50); + }); + + it("should reject refreshAtContextPercent of 0", async () => { + const { memoryNeo4jConfigSchema } = await import("./config.js"); + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", user: "neo4j", password: "test" }, + embedding: { provider: "ollama" }, + coreMemory: { refreshAtContextPercent: 0 }, + }); + expect(config.coreMemory.refreshAtContextPercent).toBeUndefined(); + }); + + it("should reject refreshAtContextPercent over 100", async () => { + const { memoryNeo4jConfigSchema } = await import("./config.js"); + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", user: "neo4j", password: "test" }, + embedding: { provider: "ollama" }, + coreMemory: { refreshAtContextPercent: 150 }, + }); + expect(config.coreMemory.refreshAtContextPercent).toBeUndefined(); + }); + + it("should default to undefined when not specified", async () => { + const { memoryNeo4jConfigSchema } = await import("./config.js"); + const config = memoryNeo4jConfigSchema.parse({ + neo4j: { uri: "bolt://localhost:7687", user: "neo4j", password: "test" }, + embedding: { provider: "ollama" }, + }); + expect(config.coreMemory.refreshAtContextPercent).toBeUndefined(); + }); + }); + + // Test output format + describe("refresh output format", () => { + it("should format core memories correctly", () => { + const coreMemories = [ + { text: "User prefers TypeScript over JavaScript" }, + { text: "User works at Acme Corp" }, + ]; + const content = coreMemories.map((m) => `- ${m.text}`).join("\n"); + const output = `\nReminder of persistent context (you may have seen this earlier, re-stating for recency):\n${content}\n`; + + expect(output).toContain(""); + expect(output).toContain(""); + expect(output).toContain("- User prefers TypeScript over JavaScript"); + expect(output).toContain("- User works at Acme Corp"); + }); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index dc648e44280..a040bc89b43 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -31,6 +31,8 @@ import { listChannelSupportedActions, resolveChannelMessageToolHints, } from "../../channel-tools.js"; +import { estimateMessagesTokens } from "../../compaction.js"; +import { DEFAULT_CONTEXT_TOKENS } from "../../defaults.js"; import { resolveOpenClawDocsPath } from "../../docs-path.js"; import { isTimeoutError } from "../../failover-error.js"; import { resolveModelAuthMode } from "../../model-auth.js"; @@ -850,10 +852,16 @@ export async function runEmbeddedAttempt( let effectivePrompt = params.prompt; if (hookRunner?.hasHooks("before_agent_start")) { try { + // Calculate context usage for mid-session memory refresh + const contextWindowTokens = params.model.contextWindow ?? DEFAULT_CONTEXT_TOKENS; + const estimatedUsedTokens = estimateMessagesTokens(activeSession.messages); + const hookResult = await hookRunner.runBeforeAgentStart( { prompt: params.prompt, messages: activeSession.messages, + contextWindowTokens, + estimatedUsedTokens, }, { agentId: hookAgentId, diff --git a/src/plugins/types.ts b/src/plugins/types.ts index dbb6b144083..d3deca2d371 100644 --- a/src/plugins/types.ts +++ b/src/plugins/types.ts @@ -343,6 +343,10 @@ export type PluginHookBootstrapResult = { export type PluginHookBeforeAgentStartEvent = { prompt: string; messages?: unknown[]; + /** Model's total context window in tokens. */ + contextWindowTokens?: number; + /** Estimated tokens currently used in context. */ + estimatedUsedTokens?: number; }; export type PluginHookBeforeAgentStartResult = {