mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-08 15:48:28 +00:00
fix: flush block streaming on paragraph boundaries for chunkMode=newline (#7014)
* feat: Implement paragraph boundary flushing in block streaming - Added `flushOnParagraph` option to `BlockReplyChunking` for immediate flushing on paragraph breaks. - Updated `EmbeddedBlockChunker` to handle paragraph boundaries during chunking. - Enhanced `createBlockReplyCoalescer` to support flushing on enqueue. - Added tests to verify behavior of flushing with and without `flushOnEnqueue` set. - Updated relevant types and interfaces to include `flushOnParagraph` and `flushOnEnqueue` options. * fix: Improve streaming behavior and enhance block chunking logic - Resolved issue with stuck typing indicator after streamed BlueBubbles replies. - Refactored `EmbeddedBlockChunker` to streamline fence-split handling and ensure maxChars fallback for newline chunking. - Added tests to validate new chunking behavior, including handling of paragraph breaks and fence scenarios. - Updated changelog to reflect these changes. * test: Add test for clamping long paragraphs in EmbeddedBlockChunker - Introduced a new test case to verify that long paragraphs are correctly clamped to maxChars when flushOnParagraph is enabled. - Updated logic in EmbeddedBlockChunker to handle cases where the next paragraph break exceeds maxChars, ensuring proper chunking behavior. * refactor: streamline logging and improve error handling in message processing - Removed verbose logging statements from the `processMessage` function to reduce clutter. - Enhanced error handling by using `runtime.error` for typing restart failures. - Updated the `applySystemPromptOverrideToSession` function to accept a string directly instead of a function, simplifying the prompt application process. - Adjusted the `runEmbeddedAttempt` function to directly use the system prompt override without invoking it as a function.
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
import type { FenceSpan } from "../markdown/fences.js";
|
||||
import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
|
||||
|
||||
export type BlockReplyChunking = {
|
||||
minChars: number;
|
||||
maxChars: number;
|
||||
breakPreference?: "paragraph" | "newline" | "sentence";
|
||||
/** When true, flush eagerly on \n\n paragraph boundaries regardless of minChars. */
|
||||
flushOnParagraph?: boolean;
|
||||
};
|
||||
|
||||
type FenceSplit = {
|
||||
@@ -16,6 +19,11 @@ type BreakResult = {
|
||||
fenceSplit?: FenceSplit;
|
||||
};
|
||||
|
||||
type ParagraphBreak = {
|
||||
index: number;
|
||||
length: number;
|
||||
};
|
||||
|
||||
export class EmbeddedBlockChunker {
|
||||
#buffer = "";
|
||||
readonly #chunking: BlockReplyChunking;
|
||||
@@ -49,6 +57,14 @@ export class EmbeddedBlockChunker {
|
||||
const { force, emit } = params;
|
||||
const minChars = Math.max(1, Math.floor(this.#chunking.minChars));
|
||||
const maxChars = Math.max(minChars, Math.floor(this.#chunking.maxChars));
|
||||
|
||||
// When flushOnParagraph is set (chunkMode="newline"), eagerly split on \n\n
|
||||
// boundaries regardless of minChars so each paragraph is sent immediately.
|
||||
if (this.#chunking.flushOnParagraph && !force) {
|
||||
this.#drainParagraphs(emit, maxChars);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.#buffer.length < minChars && !force) {
|
||||
return;
|
||||
}
|
||||
@@ -74,39 +90,10 @@ export class EmbeddedBlockChunker {
|
||||
return;
|
||||
}
|
||||
|
||||
const breakIdx = breakResult.index;
|
||||
let rawChunk = this.#buffer.slice(0, breakIdx);
|
||||
if (rawChunk.trim().length === 0) {
|
||||
this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart();
|
||||
if (!this.#emitBreakResult(breakResult, emit)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let nextBuffer = this.#buffer.slice(breakIdx);
|
||||
const fenceSplit = breakResult.fenceSplit;
|
||||
if (fenceSplit) {
|
||||
const closeFence = rawChunk.endsWith("\n")
|
||||
? `${fenceSplit.closeFenceLine}\n`
|
||||
: `\n${fenceSplit.closeFenceLine}\n`;
|
||||
rawChunk = `${rawChunk}${closeFence}`;
|
||||
|
||||
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
|
||||
? fenceSplit.reopenFenceLine
|
||||
: `${fenceSplit.reopenFenceLine}\n`;
|
||||
nextBuffer = `${reopenFence}${nextBuffer}`;
|
||||
}
|
||||
|
||||
emit(rawChunk);
|
||||
|
||||
if (fenceSplit) {
|
||||
this.#buffer = nextBuffer;
|
||||
} else {
|
||||
const nextStart =
|
||||
breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
|
||||
? breakIdx + 1
|
||||
: breakIdx;
|
||||
this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
|
||||
}
|
||||
|
||||
if (this.#buffer.length < minChars && !force) {
|
||||
return;
|
||||
}
|
||||
@@ -116,6 +103,76 @@ export class EmbeddedBlockChunker {
|
||||
}
|
||||
}
|
||||
|
||||
/** Eagerly emit complete paragraphs (text before \n\n) regardless of minChars. */
|
||||
#drainParagraphs(emit: (chunk: string) => void, maxChars: number) {
|
||||
while (this.#buffer.length > 0) {
|
||||
const fenceSpans = parseFenceSpans(this.#buffer);
|
||||
const paragraphBreak = findNextParagraphBreak(this.#buffer, fenceSpans);
|
||||
if (!paragraphBreak || paragraphBreak.index > maxChars) {
|
||||
// No paragraph boundary yet (or the next boundary is too far). If the
|
||||
// buffer exceeds maxChars, fall back to normal break logic to avoid
|
||||
// oversized chunks or unbounded accumulation.
|
||||
if (this.#buffer.length >= maxChars) {
|
||||
const breakResult = this.#pickBreakIndex(this.#buffer, 1);
|
||||
if (breakResult.index > 0) {
|
||||
this.#emitBreakResult(breakResult, emit);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const chunk = this.#buffer.slice(0, paragraphBreak.index);
|
||||
if (chunk.trim().length > 0) {
|
||||
emit(chunk);
|
||||
}
|
||||
this.#buffer = stripLeadingNewlines(
|
||||
this.#buffer.slice(paragraphBreak.index + paragraphBreak.length),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#emitBreakResult(breakResult: BreakResult, emit: (chunk: string) => void): boolean {
|
||||
const breakIdx = breakResult.index;
|
||||
if (breakIdx <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let rawChunk = this.#buffer.slice(0, breakIdx);
|
||||
if (rawChunk.trim().length === 0) {
|
||||
this.#buffer = stripLeadingNewlines(this.#buffer.slice(breakIdx)).trimStart();
|
||||
return false;
|
||||
}
|
||||
|
||||
let nextBuffer = this.#buffer.slice(breakIdx);
|
||||
const fenceSplit = breakResult.fenceSplit;
|
||||
if (fenceSplit) {
|
||||
const closeFence = rawChunk.endsWith("\n")
|
||||
? `${fenceSplit.closeFenceLine}\n`
|
||||
: `\n${fenceSplit.closeFenceLine}\n`;
|
||||
rawChunk = `${rawChunk}${closeFence}`;
|
||||
|
||||
const reopenFence = fenceSplit.reopenFenceLine.endsWith("\n")
|
||||
? fenceSplit.reopenFenceLine
|
||||
: `${fenceSplit.reopenFenceLine}\n`;
|
||||
nextBuffer = `${reopenFence}${nextBuffer}`;
|
||||
}
|
||||
|
||||
emit(rawChunk);
|
||||
|
||||
if (fenceSplit) {
|
||||
this.#buffer = nextBuffer;
|
||||
} else {
|
||||
const nextStart =
|
||||
breakIdx < this.#buffer.length && /\s/.test(this.#buffer[breakIdx])
|
||||
? breakIdx + 1
|
||||
: breakIdx;
|
||||
this.#buffer = stripLeadingNewlines(this.#buffer.slice(nextStart));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#pickSoftBreakIndex(buffer: string, minCharsOverride?: number): BreakResult {
|
||||
const minChars = Math.max(1, Math.floor(minCharsOverride ?? this.#chunking.minChars));
|
||||
if (buffer.length < minChars) {
|
||||
@@ -269,3 +326,27 @@ function stripLeadingNewlines(value: string): string {
|
||||
}
|
||||
return i > 0 ? value.slice(i) : value;
|
||||
}
|
||||
|
||||
function findNextParagraphBreak(
|
||||
buffer: string,
|
||||
fenceSpans: FenceSpan[],
|
||||
startIndex = 0,
|
||||
): ParagraphBreak | null {
|
||||
if (startIndex < 0) {
|
||||
return null;
|
||||
}
|
||||
const re = /\n[\t ]*\n+/g;
|
||||
re.lastIndex = startIndex;
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = re.exec(buffer)) !== null) {
|
||||
const index = match.index ?? -1;
|
||||
if (index < 0) {
|
||||
continue;
|
||||
}
|
||||
if (!isSafeFenceBreak(fenceSpans, index)) {
|
||||
continue;
|
||||
}
|
||||
return { index, length: match[0].length };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user