fix: flush block streaming on paragraph boundaries for chunkMode=newline (#7014)

* feat: Implement paragraph boundary flushing in block streaming - Added `flushOnParagraph` option to `BlockReplyChunking` for immediate flushing on paragraph breaks. - Updated `EmbeddedBlockChunker` to handle paragraph boundaries during chunking. - Enhanced `createBlockReplyCoalescer` to support flushing on enqueue. - Added tests to verify behavior of flushing with and without `flushOnEnqueue` set. - Updated relevant types and interfaces to include `flushOnParagraph` and `flushOnEnqueue` options. * fix: Improve streaming behavior and enhance block chunking logic - Resolved issue with stuck typing indicator after streamed BlueBubbles replies. - Refactored `EmbeddedBlockChunker` to streamline fence-split handling and ensure maxChars fallback for newline chunking. - Added tests to validate new chunking behavior, including handling of paragraph breaks and fence scenarios. - Updated changelog to reflect these changes. * test: Add test for clamping long paragraphs in EmbeddedBlockChunker - Introduced a new test case to verify that long paragraphs are correctly clamped to maxChars when flushOnParagraph is enabled. - Updated logic in EmbeddedBlockChunker to handle cases where the next paragraph break exceeds maxChars, ensuring proper chunking behavior. * refactor: streamline logging and improve error handling in message processing - Removed verbose logging statements from the `processMessage` function to reduce clutter. - Enhanced error handling by using `runtime.error` for typing restart failures. - Updated the `applySystemPromptOverrideToSession` function to accept a string directly instead of a function, simplifying the prompt application process. - Adjusted the `runEmbeddedAttempt` function to directly use the system prompt override without invoking it as a function.
2026-05-07 03:11:35 +00:00 · 2026-02-02 01:22:41 -08:00
parent 85cd55e22b
commit 9ef24fd400
14 changed files with 377 additions and 73 deletions
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -63,6 +63,7 @@ export async function runAgentTurnWithFallback(params: {
    minChars: number;
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
+    flushOnParagraph?: boolean;
  };
  resolvedBlockStreamingBreak: "text_end" | "message_end";
  applyReplyToMode: (payload: ReplyPayload) => ReplyPayload;
--- a/src/auto-reply/reply/agent-runner.ts
+++ b/src/auto-reply/reply/agent-runner.ts
@@ -68,6 +68,7 @@ export async function runReplyAgent(params: {
    minChars: number;
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
+    flushOnParagraph?: boolean;
  };
  resolvedBlockStreamingBreak: "text_end" | "message_end";
  sessionCtx: TemplateContext;
--- a/src/auto-reply/reply/block-reply-coalescer.ts
+++ b/src/auto-reply/reply/block-reply-coalescer.ts
@@ -18,6 +18,7 @@ export function createBlockReplyCoalescer(params: {
  const maxChars = Math.max(minChars, Math.floor(config.maxChars));
  const idleMs = Math.max(0, Math.floor(config.idleMs));
  const joiner = config.joiner ?? "";
+  const flushOnEnqueue = config.flushOnEnqueue === true;

  let bufferText = "";
  let bufferReplyToId: ReplyPayload["replyToId"];
@@ -57,7 +58,7 @@ export function createBlockReplyCoalescer(params: {
    if (!bufferText) {
      return;
    }
-    if (!options?.force && bufferText.length < minChars) {
+    if (!options?.force && !flushOnEnqueue && bufferText.length < minChars) {
      scheduleIdleFlush();
      return;
    }
@@ -86,6 +87,19 @@ export function createBlockReplyCoalescer(params: {
      return;
    }

+    // When flushOnEnqueue is set (chunkMode="newline"), each enqueued payload is treated
+    // as a separate paragraph and flushed immediately so delivery matches streaming boundaries.
+    if (flushOnEnqueue) {
+      if (bufferText) {
+        void flush({ force: true });
+      }
+      bufferReplyToId = payload.replyToId;
+      bufferAudioAsVoice = payload.audioAsVoice;
+      bufferText = text;
+      void flush({ force: true });
+      return;
+    }
+
    if (
      bufferText &&
      (bufferReplyToId !== payload.replyToId || bufferAudioAsVoice !== payload.audioAsVoice)
--- a/src/auto-reply/reply/block-streaming.ts
+++ b/src/auto-reply/reply/block-streaming.ts
@@ -7,7 +7,7 @@ import {
  INTERNAL_MESSAGE_CHANNEL,
  listDeliverableMessageChannels,
 } from "../../utils/message-channel.js";
-import { resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js";
+import { resolveChunkMode, resolveTextChunkLimit, type TextChunkProvider } from "../chunk.js";

 const DEFAULT_BLOCK_STREAM_MIN = 800;
 const DEFAULT_BLOCK_STREAM_MAX = 1200;
@@ -54,6 +54,8 @@ export type BlockStreamingCoalescing = {
  maxChars: number;
  idleMs: number;
  joiner: string;
+  /** When true, the coalescer flushes the buffer on each enqueue (paragraph-boundary flush). */
+  flushOnEnqueue?: boolean;
 };

 export function resolveBlockStreamingChunking(
@@ -64,22 +66,23 @@ export function resolveBlockStreamingChunking(
  minChars: number;
  maxChars: number;
  breakPreference: "paragraph" | "newline" | "sentence";
+  flushOnParagraph?: boolean;
 } {
  const providerKey = normalizeChunkProvider(provider);
+  const providerConfigKey = providerKey;
  const providerId = providerKey ? normalizeChannelId(providerKey) : null;
  const providerChunkLimit = providerId
    ? getChannelDock(providerId)?.outbound?.textChunkLimit
    : undefined;
-  const textLimit = resolveTextChunkLimit(cfg, providerKey, accountId, {
+  const textLimit = resolveTextChunkLimit(cfg, providerConfigKey, accountId, {
    fallbackLimit: providerChunkLimit,
  });
  const chunkCfg = cfg?.agents?.defaults?.blockStreamingChunk;

-  // Note: chunkMode="newline" used to imply splitting on each newline, but outbound
-  // delivery now treats it as paragraph-aware chunking (only split on blank lines).
-  // Block streaming should follow the same rule, so we do NOT special-case newline
-  // mode here.
-  // (chunkMode no longer alters block streaming behavior)
+  // When chunkMode="newline", the outbound delivery splits on paragraph boundaries.
+  // The block chunker should flush eagerly on \n\n boundaries during streaming,
+  // regardless of minChars, so each paragraph is sent as its own message.
+  const chunkMode = resolveChunkMode(cfg, providerConfigKey, accountId);

  const maxRequested = Math.max(1, Math.floor(chunkCfg?.maxChars ?? DEFAULT_BLOCK_STREAM_MAX));
  const maxChars = Math.max(1, Math.min(maxRequested, textLimit));
@@ -90,7 +93,12 @@ export function resolveBlockStreamingChunking(
    chunkCfg?.breakPreference === "newline" || chunkCfg?.breakPreference === "sentence"
      ? chunkCfg.breakPreference
      : "paragraph";
-  return { minChars, maxChars, breakPreference };
+  return {
+    minChars,
+    maxChars,
+    breakPreference,
+    flushOnParagraph: chunkMode === "newline",
+  };
 }

 export function resolveBlockStreamingCoalescing(
@@ -102,17 +110,20 @@ export function resolveBlockStreamingCoalescing(
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
  },
+  opts?: { chunkMode?: "length" | "newline" },
 ): BlockStreamingCoalescing | undefined {
  const providerKey = normalizeChunkProvider(provider);
+  const providerConfigKey = providerKey;

-  // Note: chunkMode="newline" is paragraph-aware in outbound delivery (blank-line splits),
-  // so block streaming should not disable coalescing or flush per single newline.
+  // Resolve the outbound chunkMode so the coalescer can flush on paragraph boundaries
+  // when chunkMode="newline", matching the delivery-time splitting behavior.
+  const chunkMode = opts?.chunkMode ?? resolveChunkMode(cfg, providerConfigKey, accountId);

  const providerId = providerKey ? normalizeChannelId(providerKey) : null;
  const providerChunkLimit = providerId
    ? getChannelDock(providerId)?.outbound?.textChunkLimit
    : undefined;
-  const textLimit = resolveTextChunkLimit(cfg, providerKey, accountId, {
+  const textLimit = resolveTextChunkLimit(cfg, providerConfigKey, accountId, {
    fallbackLimit: providerChunkLimit,
  });
  const providerDefaults = providerId
@@ -149,5 +160,6 @@ export function resolveBlockStreamingCoalescing(
    maxChars,
    idleMs,
    joiner,
+    flushOnEnqueue: chunkMode === "newline",
  };
 }
--- a/src/auto-reply/reply/formatting.test.ts
+++ b/src/auto-reply/reply/formatting.test.ts
@@ -71,6 +71,81 @@ describe("block reply coalescer", () => {
    coalescer.stop();
  });

+  it("flushes each enqueued payload separately when flushOnEnqueue is set", async () => {
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 1, maxChars: 200, idleMs: 100, joiner: "\n\n", flushOnEnqueue: true },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    coalescer.enqueue({ text: "First paragraph" });
+    coalescer.enqueue({ text: "Second paragraph" });
+    coalescer.enqueue({ text: "Third paragraph" });
+
+    await Promise.resolve();
+    expect(flushes).toEqual(["First paragraph", "Second paragraph", "Third paragraph"]);
+    coalescer.stop();
+  });
+
+  it("still accumulates when flushOnEnqueue is not set (default)", async () => {
+    vi.useFakeTimers();
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 1, maxChars: 2000, idleMs: 100, joiner: "\n\n" },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    coalescer.enqueue({ text: "First paragraph" });
+    coalescer.enqueue({ text: "Second paragraph" });
+
+    await vi.advanceTimersByTimeAsync(100);
+    expect(flushes).toEqual(["First paragraph\n\nSecond paragraph"]);
+    coalescer.stop();
+  });
+
+  it("flushes short payloads immediately when flushOnEnqueue is set", async () => {
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 10, maxChars: 200, idleMs: 50, joiner: "\n\n", flushOnEnqueue: true },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    coalescer.enqueue({ text: "Hi" });
+    await Promise.resolve();
+    expect(flushes).toEqual(["Hi"]);
+    coalescer.stop();
+  });
+
+  it("resets char budget per paragraph with flushOnEnqueue", async () => {
+    const flushes: string[] = [];
+    const coalescer = createBlockReplyCoalescer({
+      config: { minChars: 1, maxChars: 30, idleMs: 100, joiner: "\n\n", flushOnEnqueue: true },
+      shouldAbort: () => false,
+      onFlush: (payload) => {
+        flushes.push(payload.text ?? "");
+      },
+    });
+
+    // Each 20-char payload fits within maxChars=30 individually
+    coalescer.enqueue({ text: "12345678901234567890" });
+    coalescer.enqueue({ text: "abcdefghijklmnopqrst" });
+
+    await Promise.resolve();
+    // Without flushOnEnqueue, these would be joined to 40+ chars and trigger maxChars split.
+    // With flushOnEnqueue, each is sent independently within budget.
+    expect(flushes).toEqual(["12345678901234567890", "abcdefghijklmnopqrst"]);
+    coalescer.stop();
+  });
+
  it("flushes buffered text before media payloads", () => {
    const flushes: Array<{ text?: string; mediaUrls?: string[] }> = [];
    const coalescer = createBlockReplyCoalescer({
--- a/src/auto-reply/reply/get-reply-directives.ts
+++ b/src/auto-reply/reply/get-reply-directives.ts
@@ -46,6 +46,7 @@ export type ReplyDirectiveContinuation = {
    minChars: number;
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
+    flushOnParagraph?: boolean;
  };
  resolvedBlockStreamingBreak: "text_end" | "message_end";
  provider: string;
--- a/src/auto-reply/reply/get-reply-run.ts
+++ b/src/auto-reply/reply/get-reply-run.ts
@@ -76,6 +76,7 @@ type RunPreparedReplyParams = {
    minChars: number;
    maxChars: number;
    breakPreference: "paragraph" | "newline" | "sentence";
+    flushOnParagraph?: boolean;
  };
  resolvedBlockStreamingBreak: "text_end" | "message_end";
  modelState: Awaited<ReturnType<typeof createModelSelectionState>>;