feat(memory): Add MMR re-ranking for search result diversity

Adds Maximal Marginal Relevance (MMR) re-ranking to hybrid search results.

- New mmr.ts with tokenization, Jaccard similarity, and MMR algorithm
- Integrated into mergeHybridResults() with optional mmr config
- 40 comprehensive tests covering edge cases and diversity behavior
- Configurable lambda parameter (default 0.7) to balance relevance vs diversity
- Updated CHANGELOG.md and memory docs

This helps avoid redundant results when multiple chunks contain similar content.
This commit is contained in:
Rodrigo Uroz
2026-01-26 15:23:22 -03:00
committed by Peter Steinberger
parent a0ab301dc3
commit fa9420069a
5 changed files with 610 additions and 7 deletions

View File

@@ -1,5 +1,9 @@
import { applyMMRToHybridResults, type MMRConfig, DEFAULT_MMR_CONFIG } from "./mmr.js";
export type HybridSource = string;
export { type MMRConfig, DEFAULT_MMR_CONFIG };
export type HybridVectorResult = {
id: string;
path: string;
@@ -43,6 +47,8 @@ export function mergeHybridResults(params: {
keyword: HybridKeywordResult[];
vectorWeight: number;
textWeight: number;
/** MMR configuration for diversity-aware re-ranking */
mmr?: Partial<MMRConfig>;
}): Array<{
path: string;
startLine: number;
@@ -111,5 +117,13 @@ export function mergeHybridResults(params: {
};
});
return merged.toSorted((a, b) => b.score - a.score);
const sorted = merged.toSorted((a, b) => b.score - a.score);
// Apply MMR re-ranking if enabled
const mmrConfig = { ...DEFAULT_MMR_CONFIG, ...params.mmr };
if (mmrConfig.enabled) {
return applyMMRToHybridResults(sorted, mmrConfig);
}
return sorted;
}