fix(memory): reindex when sources change

2026-04-19 06:17:27 +00:00 · 2026-02-22 15:12:01 -08:00
parent 44727dc3a1
commit d7747148d0
3 changed files with 119 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -118,6 +118,7 @@ Docs: https://docs.openclaw.ai
 - Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
 - Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
 - Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
 - Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent.
 - Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
 - Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
 - Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.
--- a/src/memory/index.test.ts
+++ b/src/memory/index.test.ts
@@ -93,6 +93,8 @@ describe("memory index", () => {
  function createCfg(params: {
    storePath: string;
    extraPaths?: string[];
    sources?: Array<"memory" | "sessions">;
    sessionMemory?: boolean;
    model?: string;
    vectorEnabled?: boolean;
    cacheEnabled?: boolean;
@@ -115,6 +117,8 @@ describe("memory index", () => {
            },
            cache: params.cacheEnabled ? { enabled: true } : undefined,
            extraPaths: params.extraPaths,
            sources: params.sources,
            experimental: { sessionMemory: params.sessionMemory ?? false },
          },
        },
        list: [{ id: "main", default: true }],
@@ -195,6 +199,85 @@ describe("memory index", () => {
    await statusOnly.manager.close?.();
  });
  it("reindexes sessions when source config adds sessions to an existing index", async () => {
    const indexSourceChangePath = path.join(
      workspaceDir,
      `index-source-change-${Date.now()}.sqlite`,
    );
    const stateDir = path.join(fixtureRoot, `state-source-change-${Date.now()}`);
    const sessionDir = path.join(stateDir, "agents", "main", "sessions");
    await fs.mkdir(sessionDir, { recursive: true });
    await fs.writeFile(
      path.join(sessionDir, "session-source-change.jsonl"),
      [
        JSON.stringify({
          type: "message",
          message: {
            role: "user",
            content: [{ type: "text", text: "session change test user line" }],
          },
        }),
        JSON.stringify({
          type: "message",
          message: {
            role: "assistant",
            content: [{ type: "text", text: "session change test assistant line" }],
          },
        }),
      ].join("\n") + "\n",
    );
    const previousStateDir = process.env.OPENCLAW_STATE_DIR;
    process.env.OPENCLAW_STATE_DIR = stateDir;
    const firstCfg = createCfg({
      storePath: indexSourceChangePath,
      sources: ["memory"],
      sessionMemory: false,
    });
    const secondCfg = createCfg({
      storePath: indexSourceChangePath,
      sources: ["memory", "sessions"],
      sessionMemory: true,
    });
    try {
      const first = await getMemorySearchManager({ cfg: firstCfg, agentId: "main" });
      expect(first.manager).not.toBeNull();
      if (!first.manager) {
        throw new Error("manager missing");
      }
      await first.manager.sync?.({ reason: "test" });
      const firstStatus = first.manager.status();
      expect(
        firstStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files ?? 0,
      ).toBe(0);
      await first.manager.close?.();
      const second = await getMemorySearchManager({ cfg: secondCfg, agentId: "main" });
      expect(second.manager).not.toBeNull();
      if (!second.manager) {
        throw new Error("manager missing");
      }
      await second.manager.sync?.({ reason: "test" });
      const secondStatus = second.manager.status();
      expect(secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files).toBe(
        1,
      );
      expect(
        secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.chunks ?? 0,
      ).toBeGreaterThan(0);
      await second.manager.close?.();
    } finally {
      if (previousStateDir === undefined) {
        delete process.env.OPENCLAW_STATE_DIR;
      } else {
        process.env.OPENCLAW_STATE_DIR = previousStateDir;
      }
      await fs.rm(stateDir, { recursive: true, force: true });
    }
  });
  it("reindexes when the embedding model changes", async () => {
    const indexModelPath = path.join(workspaceDir, `index-model-change-${Date.now()}.sqlite`);
    const base = createCfg({ storePath: indexModelPath });
--- a/src/memory/manager-sync-ops.ts
+++ b/src/memory/manager-sync-ops.ts
@@ -45,6 +45,7 @@ type MemoryIndexMeta = {
  model: string;
  provider: string;
  providerKey?: string;
  sources?: MemorySource[];
  chunkTokens: number;
  chunkOverlap: number;
  vectorDims?: number;
@@ -851,12 +852,14 @@ export abstract class MemoryManagerSyncOps {
    }
    const vectorReady = await this.ensureVectorReady();
    const meta = this.readMeta();
    const configuredSources = this.resolveConfiguredSourcesForMeta();
    const needsFullReindex =
      params?.force ||
      !meta ||
      (this.provider && meta.model !== this.provider.model) ||
      (this.provider && meta.provider !== this.provider.id) ||
      meta.providerKey !== this.providerKey ||
      this.metaSourcesDiffer(meta, configuredSources) ||
      meta.chunkTokens !== this.settings.chunking.tokens ||
      meta.chunkOverlap !== this.settings.chunking.overlap ||
      (vectorReady && !meta?.vectorDims);
@@ -1056,6 +1059,7 @@ export abstract class MemoryManagerSyncOps {
        model: this.provider?.model ?? "fts-only",
        provider: this.provider?.id ?? "none",
        providerKey: this.providerKey!,
        sources: this.resolveConfiguredSourcesForMeta(),
        chunkTokens: this.settings.chunking.tokens,
        chunkOverlap: this.settings.chunking.overlap,
      };
@@ -1126,6 +1130,7 @@ export abstract class MemoryManagerSyncOps {
      model: this.provider?.model ?? "fts-only",
      provider: this.provider?.id ?? "none",
      providerKey: this.providerKey!,
      sources: this.resolveConfiguredSourcesForMeta(),
      chunkTokens: this.settings.chunking.tokens,
      chunkOverlap: this.settings.chunking.overlap,
    };
@@ -1172,4 +1177,34 @@ export abstract class MemoryManagerSyncOps {
      )
      .run(META_KEY, value);
  }
  private resolveConfiguredSourcesForMeta(): MemorySource[] {
    const normalized = Array.from(this.sources)
      .filter((source): source is MemorySource => source === "memory" || source === "sessions")
      .toSorted();
    return normalized.length > 0 ? normalized : ["memory"];
  }
  private normalizeMetaSources(meta: MemoryIndexMeta): MemorySource[] {
    if (!Array.isArray(meta.sources)) {
      // Backward compatibility for older indexes that did not persist sources.
      return ["memory"];
    }
    const normalized = Array.from(
      new Set(
        meta.sources.filter(
          (source): source is MemorySource => source === "memory" || source === "sessions",
        ),
      ),
    ).toSorted();
    return normalized.length > 0 ? normalized : ["memory"];
  }
  private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
    const metaSources = this.normalizeMetaSources(meta);
    if (metaSources.length !== configuredSources.length) {
      return true;
    }
    return metaSources.some((source, index) => source !== configuredSources[index]);
  }
 }