diff --git a/src/agents/pi-embedded-block-chunker.ts b/src/agents/pi-embedded-block-chunker.ts index 0416380beb0..d3b5638a087 100644 --- a/src/agents/pi-embedded-block-chunker.ts +++ b/src/agents/pi-embedded-block-chunker.ts @@ -24,6 +24,26 @@ type ParagraphBreak = { length: number; }; +function findSafeSentenceBreakIndex( + text: string, + fenceSpans: FenceSpan[], + minChars: number, +): number { + const matches = text.matchAll(/[.!?](?=\s|$)/g); + let sentenceIdx = -1; + for (const match of matches) { + const at = match.index ?? -1; + if (at < minChars) { + continue; + } + const candidate = at + 1; + if (isSafeFenceBreak(fenceSpans, candidate)) { + sentenceIdx = candidate; + } + } + return sentenceIdx >= minChars ? sentenceIdx : -1; +} + export class EmbeddedBlockChunker { #buffer = ""; readonly #chunking: BlockReplyChunking; @@ -211,19 +231,8 @@ export class EmbeddedBlockChunker { } if (preference !== "newline") { - const matches = buffer.matchAll(/[.!?](?=\s|$)/g); - let sentenceIdx = -1; - for (const match of matches) { - const at = match.index ?? -1; - if (at < minChars) { - continue; - } - const candidate = at + 1; - if (isSafeFenceBreak(fenceSpans, candidate)) { - sentenceIdx = candidate; - } - } - if (sentenceIdx >= minChars) { + const sentenceIdx = findSafeSentenceBreakIndex(buffer, fenceSpans, minChars); + if (sentenceIdx !== -1) { return { index: sentenceIdx }; } } @@ -271,19 +280,8 @@ export class EmbeddedBlockChunker { } if (preference !== "newline") { - const matches = window.matchAll(/[.!?](?=\s|$)/g); - let sentenceIdx = -1; - for (const match of matches) { - const at = match.index ?? -1; - if (at < minChars) { - continue; - } - const candidate = at + 1; - if (isSafeFenceBreak(fenceSpans, candidate)) { - sentenceIdx = candidate; - } - } - if (sentenceIdx >= minChars) { + const sentenceIdx = findSafeSentenceBreakIndex(window, fenceSpans, minChars); + if (sentenceIdx !== -1) { return { index: sentenceIdx }; } }