mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 06:17:27 +00:00
fix(memory): reindex when sources change
This commit is contained in:
@@ -118,6 +118,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
- Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
|
- Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
|
||||||
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
|
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
|
||||||
- Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
|
- Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
|
||||||
|
- Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent.
|
||||||
- Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
|
- Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
|
||||||
- Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
|
- Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
|
||||||
- Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.
|
- Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.
|
||||||
|
|||||||
@@ -93,6 +93,8 @@ describe("memory index", () => {
|
|||||||
function createCfg(params: {
|
function createCfg(params: {
|
||||||
storePath: string;
|
storePath: string;
|
||||||
extraPaths?: string[];
|
extraPaths?: string[];
|
||||||
|
sources?: Array<"memory" | "sessions">;
|
||||||
|
sessionMemory?: boolean;
|
||||||
model?: string;
|
model?: string;
|
||||||
vectorEnabled?: boolean;
|
vectorEnabled?: boolean;
|
||||||
cacheEnabled?: boolean;
|
cacheEnabled?: boolean;
|
||||||
@@ -115,6 +117,8 @@ describe("memory index", () => {
|
|||||||
},
|
},
|
||||||
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
cache: params.cacheEnabled ? { enabled: true } : undefined,
|
||||||
extraPaths: params.extraPaths,
|
extraPaths: params.extraPaths,
|
||||||
|
sources: params.sources,
|
||||||
|
experimental: { sessionMemory: params.sessionMemory ?? false },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
list: [{ id: "main", default: true }],
|
list: [{ id: "main", default: true }],
|
||||||
@@ -195,6 +199,85 @@ describe("memory index", () => {
|
|||||||
await statusOnly.manager.close?.();
|
await statusOnly.manager.close?.();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("reindexes sessions when source config adds sessions to an existing index", async () => {
|
||||||
|
const indexSourceChangePath = path.join(
|
||||||
|
workspaceDir,
|
||||||
|
`index-source-change-${Date.now()}.sqlite`,
|
||||||
|
);
|
||||||
|
const stateDir = path.join(fixtureRoot, `state-source-change-${Date.now()}`);
|
||||||
|
const sessionDir = path.join(stateDir, "agents", "main", "sessions");
|
||||||
|
await fs.mkdir(sessionDir, { recursive: true });
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(sessionDir, "session-source-change.jsonl"),
|
||||||
|
[
|
||||||
|
JSON.stringify({
|
||||||
|
type: "message",
|
||||||
|
message: {
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "text", text: "session change test user line" }],
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
JSON.stringify({
|
||||||
|
type: "message",
|
||||||
|
message: {
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text: "session change test assistant line" }],
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
].join("\n") + "\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
const previousStateDir = process.env.OPENCLAW_STATE_DIR;
|
||||||
|
process.env.OPENCLAW_STATE_DIR = stateDir;
|
||||||
|
|
||||||
|
const firstCfg = createCfg({
|
||||||
|
storePath: indexSourceChangePath,
|
||||||
|
sources: ["memory"],
|
||||||
|
sessionMemory: false,
|
||||||
|
});
|
||||||
|
const secondCfg = createCfg({
|
||||||
|
storePath: indexSourceChangePath,
|
||||||
|
sources: ["memory", "sessions"],
|
||||||
|
sessionMemory: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const first = await getMemorySearchManager({ cfg: firstCfg, agentId: "main" });
|
||||||
|
expect(first.manager).not.toBeNull();
|
||||||
|
if (!first.manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
await first.manager.sync?.({ reason: "test" });
|
||||||
|
const firstStatus = first.manager.status();
|
||||||
|
expect(
|
||||||
|
firstStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files ?? 0,
|
||||||
|
).toBe(0);
|
||||||
|
await first.manager.close?.();
|
||||||
|
|
||||||
|
const second = await getMemorySearchManager({ cfg: secondCfg, agentId: "main" });
|
||||||
|
expect(second.manager).not.toBeNull();
|
||||||
|
if (!second.manager) {
|
||||||
|
throw new Error("manager missing");
|
||||||
|
}
|
||||||
|
await second.manager.sync?.({ reason: "test" });
|
||||||
|
const secondStatus = second.manager.status();
|
||||||
|
expect(secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files).toBe(
|
||||||
|
1,
|
||||||
|
);
|
||||||
|
expect(
|
||||||
|
secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.chunks ?? 0,
|
||||||
|
).toBeGreaterThan(0);
|
||||||
|
await second.manager.close?.();
|
||||||
|
} finally {
|
||||||
|
if (previousStateDir === undefined) {
|
||||||
|
delete process.env.OPENCLAW_STATE_DIR;
|
||||||
|
} else {
|
||||||
|
process.env.OPENCLAW_STATE_DIR = previousStateDir;
|
||||||
|
}
|
||||||
|
await fs.rm(stateDir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
it("reindexes when the embedding model changes", async () => {
|
it("reindexes when the embedding model changes", async () => {
|
||||||
const indexModelPath = path.join(workspaceDir, `index-model-change-${Date.now()}.sqlite`);
|
const indexModelPath = path.join(workspaceDir, `index-model-change-${Date.now()}.sqlite`);
|
||||||
const base = createCfg({ storePath: indexModelPath });
|
const base = createCfg({ storePath: indexModelPath });
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ type MemoryIndexMeta = {
|
|||||||
model: string;
|
model: string;
|
||||||
provider: string;
|
provider: string;
|
||||||
providerKey?: string;
|
providerKey?: string;
|
||||||
|
sources?: MemorySource[];
|
||||||
chunkTokens: number;
|
chunkTokens: number;
|
||||||
chunkOverlap: number;
|
chunkOverlap: number;
|
||||||
vectorDims?: number;
|
vectorDims?: number;
|
||||||
@@ -851,12 +852,14 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
}
|
}
|
||||||
const vectorReady = await this.ensureVectorReady();
|
const vectorReady = await this.ensureVectorReady();
|
||||||
const meta = this.readMeta();
|
const meta = this.readMeta();
|
||||||
|
const configuredSources = this.resolveConfiguredSourcesForMeta();
|
||||||
const needsFullReindex =
|
const needsFullReindex =
|
||||||
params?.force ||
|
params?.force ||
|
||||||
!meta ||
|
!meta ||
|
||||||
(this.provider && meta.model !== this.provider.model) ||
|
(this.provider && meta.model !== this.provider.model) ||
|
||||||
(this.provider && meta.provider !== this.provider.id) ||
|
(this.provider && meta.provider !== this.provider.id) ||
|
||||||
meta.providerKey !== this.providerKey ||
|
meta.providerKey !== this.providerKey ||
|
||||||
|
this.metaSourcesDiffer(meta, configuredSources) ||
|
||||||
meta.chunkTokens !== this.settings.chunking.tokens ||
|
meta.chunkTokens !== this.settings.chunking.tokens ||
|
||||||
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
meta.chunkOverlap !== this.settings.chunking.overlap ||
|
||||||
(vectorReady && !meta?.vectorDims);
|
(vectorReady && !meta?.vectorDims);
|
||||||
@@ -1056,6 +1059,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
model: this.provider?.model ?? "fts-only",
|
model: this.provider?.model ?? "fts-only",
|
||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1126,6 +1130,7 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
model: this.provider?.model ?? "fts-only",
|
model: this.provider?.model ?? "fts-only",
|
||||||
provider: this.provider?.id ?? "none",
|
provider: this.provider?.id ?? "none",
|
||||||
providerKey: this.providerKey!,
|
providerKey: this.providerKey!,
|
||||||
|
sources: this.resolveConfiguredSourcesForMeta(),
|
||||||
chunkTokens: this.settings.chunking.tokens,
|
chunkTokens: this.settings.chunking.tokens,
|
||||||
chunkOverlap: this.settings.chunking.overlap,
|
chunkOverlap: this.settings.chunking.overlap,
|
||||||
};
|
};
|
||||||
@@ -1172,4 +1177,34 @@ export abstract class MemoryManagerSyncOps {
|
|||||||
)
|
)
|
||||||
.run(META_KEY, value);
|
.run(META_KEY, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private resolveConfiguredSourcesForMeta(): MemorySource[] {
|
||||||
|
const normalized = Array.from(this.sources)
|
||||||
|
.filter((source): source is MemorySource => source === "memory" || source === "sessions")
|
||||||
|
.toSorted();
|
||||||
|
return normalized.length > 0 ? normalized : ["memory"];
|
||||||
|
}
|
||||||
|
|
||||||
|
private normalizeMetaSources(meta: MemoryIndexMeta): MemorySource[] {
|
||||||
|
if (!Array.isArray(meta.sources)) {
|
||||||
|
// Backward compatibility for older indexes that did not persist sources.
|
||||||
|
return ["memory"];
|
||||||
|
}
|
||||||
|
const normalized = Array.from(
|
||||||
|
new Set(
|
||||||
|
meta.sources.filter(
|
||||||
|
(source): source is MemorySource => source === "memory" || source === "sessions",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
).toSorted();
|
||||||
|
return normalized.length > 0 ? normalized : ["memory"];
|
||||||
|
}
|
||||||
|
|
||||||
|
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
|
||||||
|
const metaSources = this.normalizeMetaSources(meta);
|
||||||
|
if (metaSources.length !== configuredSources.length) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return metaSources.some((source, index) => source !== configuredSources[index]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user