mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-19 07:37:27 +00:00
refactor(shared): reuse outbound text chunking core
This commit is contained in:
@@ -6,6 +6,7 @@ import type { ChannelId } from "../channels/plugins/types.js";
|
|||||||
import type { OpenClawConfig } from "../config/config.js";
|
import type { OpenClawConfig } from "../config/config.js";
|
||||||
import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
|
import { findFenceSpanAt, isSafeFenceBreak, parseFenceSpans } from "../markdown/fences.js";
|
||||||
import { normalizeAccountId } from "../routing/session-key.js";
|
import { normalizeAccountId } from "../routing/session-key.js";
|
||||||
|
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
|
||||||
import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
|
import { INTERNAL_MESSAGE_CHANNEL } from "../utils/message-channel.js";
|
||||||
|
|
||||||
export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL;
|
export type TextChunkProvider = ChannelId | typeof INTERNAL_MESSAGE_CHANNEL;
|
||||||
@@ -316,41 +317,12 @@ export function chunkText(text: string, limit: number): string[] {
|
|||||||
if (early) {
|
if (early) {
|
||||||
return early;
|
return early;
|
||||||
}
|
}
|
||||||
|
return chunkTextByBreakResolver(text, limit, (window) => {
|
||||||
const chunks: string[] = [];
|
|
||||||
let remaining = text;
|
|
||||||
|
|
||||||
while (remaining.length > limit) {
|
|
||||||
const window = remaining.slice(0, limit);
|
|
||||||
|
|
||||||
// 1) Prefer a newline break inside the window (outside parentheses).
|
// 1) Prefer a newline break inside the window (outside parentheses).
|
||||||
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
|
const { lastNewline, lastWhitespace } = scanParenAwareBreakpoints(window);
|
||||||
|
|
||||||
// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
|
// 2) Otherwise prefer the last whitespace (word boundary) inside the window.
|
||||||
let breakIdx = lastNewline > 0 ? lastNewline : lastWhitespace;
|
return lastNewline > 0 ? lastNewline : lastWhitespace;
|
||||||
|
});
|
||||||
// 3) Fallback: hard break exactly at the limit.
|
|
||||||
if (breakIdx <= 0) {
|
|
||||||
breakIdx = limit;
|
|
||||||
}
|
|
||||||
|
|
||||||
const rawChunk = remaining.slice(0, breakIdx);
|
|
||||||
const chunk = rawChunk.trimEnd();
|
|
||||||
if (chunk.length > 0) {
|
|
||||||
chunks.push(chunk);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we broke on whitespace/newline, skip that separator; for hard breaks keep it.
|
|
||||||
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
||||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
|
||||||
remaining = remaining.slice(nextStart).trimStart();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (remaining.length) {
|
|
||||||
chunks.push(remaining);
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function chunkMarkdownText(text: string, limit: number): string[] {
|
export function chunkMarkdownText(text: string, limit: number): string[] {
|
||||||
|
|||||||
16
src/plugin-sdk/text-chunking.test.ts
Normal file
16
src/plugin-sdk/text-chunking.test.ts
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import { chunkTextForOutbound } from "./text-chunking.js";
|
||||||
|
|
||||||
|
describe("chunkTextForOutbound", () => {
|
||||||
|
it("returns empty for empty input", () => {
|
||||||
|
expect(chunkTextForOutbound("", 10)).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("splits on newline or whitespace boundaries", () => {
|
||||||
|
expect(chunkTextForOutbound("alpha\nbeta gamma", 8)).toEqual(["alpha", "beta", "gamma"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to hard limit when no separator exists", () => {
|
||||||
|
expect(chunkTextForOutbound("abcdefghij", 4)).toEqual(["abcd", "efgh", "ij"]);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,31 +1,9 @@
|
|||||||
|
import { chunkTextByBreakResolver } from "../shared/text-chunking.js";
|
||||||
|
|
||||||
export function chunkTextForOutbound(text: string, limit: number): string[] {
|
export function chunkTextForOutbound(text: string, limit: number): string[] {
|
||||||
if (!text) {
|
return chunkTextByBreakResolver(text, limit, (window) => {
|
||||||
return [];
|
|
||||||
}
|
|
||||||
if (limit <= 0 || text.length <= limit) {
|
|
||||||
return [text];
|
|
||||||
}
|
|
||||||
const chunks: string[] = [];
|
|
||||||
let remaining = text;
|
|
||||||
while (remaining.length > limit) {
|
|
||||||
const window = remaining.slice(0, limit);
|
|
||||||
const lastNewline = window.lastIndexOf("\n");
|
const lastNewline = window.lastIndexOf("\n");
|
||||||
const lastSpace = window.lastIndexOf(" ");
|
const lastSpace = window.lastIndexOf(" ");
|
||||||
let breakIdx = lastNewline > 0 ? lastNewline : lastSpace;
|
return lastNewline > 0 ? lastNewline : lastSpace;
|
||||||
if (breakIdx <= 0) {
|
});
|
||||||
breakIdx = limit;
|
|
||||||
}
|
|
||||||
const rawChunk = remaining.slice(0, breakIdx);
|
|
||||||
const chunk = rawChunk.trimEnd();
|
|
||||||
if (chunk.length > 0) {
|
|
||||||
chunks.push(chunk);
|
|
||||||
}
|
|
||||||
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
|
||||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
|
||||||
remaining = remaining.slice(nextStart).trimStart();
|
|
||||||
}
|
|
||||||
if (remaining.length) {
|
|
||||||
chunks.push(remaining);
|
|
||||||
}
|
|
||||||
return chunks;
|
|
||||||
}
|
}
|
||||||
|
|||||||
34
src/shared/text-chunking.ts
Normal file
34
src/shared/text-chunking.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export function chunkTextByBreakResolver(
|
||||||
|
text: string,
|
||||||
|
limit: number,
|
||||||
|
resolveBreakIndex: (window: string) => number,
|
||||||
|
): string[] {
|
||||||
|
if (!text) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
if (limit <= 0 || text.length <= limit) {
|
||||||
|
return [text];
|
||||||
|
}
|
||||||
|
const chunks: string[] = [];
|
||||||
|
let remaining = text;
|
||||||
|
while (remaining.length > limit) {
|
||||||
|
const window = remaining.slice(0, limit);
|
||||||
|
const candidateBreak = resolveBreakIndex(window);
|
||||||
|
const breakIdx =
|
||||||
|
Number.isFinite(candidateBreak) && candidateBreak > 0 && candidateBreak <= limit
|
||||||
|
? candidateBreak
|
||||||
|
: limit;
|
||||||
|
const rawChunk = remaining.slice(0, breakIdx);
|
||||||
|
const chunk = rawChunk.trimEnd();
|
||||||
|
if (chunk.length > 0) {
|
||||||
|
chunks.push(chunk);
|
||||||
|
}
|
||||||
|
const brokeOnSeparator = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||||
|
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnSeparator ? 1 : 0));
|
||||||
|
remaining = remaining.slice(nextStart).trimStart();
|
||||||
|
}
|
||||||
|
if (remaining.length) {
|
||||||
|
chunks.push(remaining);
|
||||||
|
}
|
||||||
|
return chunks;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user