feat: LLM-based query expansion for FTS mode

When searching in FTS-only mode (no embedding provider), extract meaningful
keywords from conversational queries using LLM to improve search results.

Changes:
- New query-expansion module with keyword extraction
- Supports English and Chinese stop word filtering
- Null safety guards for FTS-only mode (provider can be null)
- Lint compliance fixes for string iteration

This helps users find relevant memory entries even with vague queries.
This commit is contained in:
康熙
2026-02-16 14:46:02 +08:00
committed by Peter Steinberger
parent 65aedac20e
commit bcab2469de
5 changed files with 506 additions and 13 deletions

View File

@@ -28,6 +28,7 @@ import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js";
import { memoryManagerEmbeddingOps } from "./manager-embedding-ops.js";
import { searchKeyword, searchVector } from "./manager-search.js";
import { memoryManagerSyncOps } from "./manager-sync-ops.js";
import { extractKeywords } from "./query-expansion.js";
const SNIPPET_MAX_CHARS = 700;
const VECTOR_TABLE = "chunks_vec";
const FTS_TABLE = "chunks_fts";
@@ -233,8 +234,34 @@ export class MemoryIndexManager implements MemorySearchManager {
log.warn("memory search: no provider and FTS unavailable");
return [];
}
const ftsResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
return ftsResults.filter((entry) => entry.score >= minScore).slice(0, maxResults);
// Extract keywords for better FTS matching on conversational queries
// e.g., "that thing we discussed about the API" → ["discussed", "API"]
const keywords = extractKeywords(cleaned);
const searchTerms = keywords.length > 0 ? keywords : [cleaned];
// Search with each keyword and merge results
const resultSets = await Promise.all(
searchTerms.map((term) => this.searchKeyword(term, candidates).catch(() => [])),
);
// Merge and deduplicate results, keeping highest score for each chunk
const seenIds = new Map<string, (typeof resultSets)[0][0]>();
for (const results of resultSets) {
for (const result of results) {
const existing = seenIds.get(result.id);
if (!existing || result.score > existing.score) {
seenIds.set(result.id, result);
}
}
}
const merged = [...seenIds.values()]
.toSorted((a, b) => b.score - a.score)
.filter((entry) => entry.score >= minScore)
.slice(0, maxResults);
return merged;
}
const keywordResults = hybrid.enabled