fix: remap session JSONL chunk line numbers to original source positions (#12102)

* fix: remap session JSONL chunk line numbers to original source positions

buildSessionEntry() flattens JSONL messages into plain text before
chunkMarkdown() assigns line numbers. The stored startLine/endLine
values therefore reference positions in the flattened text, not the
original JSONL file.

- Add lineMap to SessionFileEntry tracking which JSONL line each
  extracted message came from
- Add remapChunkLines() to translate chunk positions back to original
  JSONL lines after chunking
- Guard remap with source === "sessions" to prevent misapplication
- Include lineMap in content hash so existing sessions get re-indexed

Fixes #12044

* memory: dedupe session JSONL parsing

---------

Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Marcus Castro
2026-02-10 21:09:24 -03:00
committed by GitHub
parent 424d2dddf5
commit 45488e4ec9
5 changed files with 197 additions and 122 deletions

View File

@@ -246,6 +246,27 @@ export function chunkMarkdown(
return chunks;
}
/**
* Remap chunk startLine/endLine from content-relative positions to original
* source file positions using a lineMap. Each entry in lineMap gives the
* 1-indexed source line for the corresponding 0-indexed content line.
*
* This is used for session JSONL files where buildSessionEntry() flattens
* messages into a plain-text string before chunking. Without remapping the
* stored line numbers would reference positions in the flattened text rather
* than the original JSONL file.
*/
export function remapChunkLines(chunks: MemoryChunk[], lineMap: number[] | undefined): void {
if (!lineMap || lineMap.length === 0) {
return;
}
for (const chunk of chunks) {
// startLine/endLine are 1-indexed; lineMap is 0-indexed by content line
chunk.startLine = lineMap[chunk.startLine - 1] ?? chunk.startLine;
chunk.endLine = lineMap[chunk.endLine - 1] ?? chunk.endLine;
}
}
export function parseEmbedding(raw: string): number[] {
try {
const parsed = JSON.parse(raw) as number[];