mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 11:37:26 +00:00
fix: preserve whitespace in telegram html retry chunking
This commit is contained in:
@@ -258,11 +258,69 @@ function splitTelegramChunkByHtmlLimit(
|
||||
Number.isFinite(candidateLimit) && candidateLimit > 0
|
||||
? candidateLimit
|
||||
: Math.max(1, Math.floor(currentTextLength / 2));
|
||||
const split = chunkMarkdownIR(chunk, splitLimit);
|
||||
const split = splitMarkdownIRPreserveWhitespace(chunk, splitLimit);
|
||||
if (split.length > 1) {
|
||||
return split;
|
||||
}
|
||||
return chunkMarkdownIR(chunk, Math.max(1, Math.floor(currentTextLength / 2)));
|
||||
return splitMarkdownIRPreserveWhitespace(chunk, Math.max(1, Math.floor(currentTextLength / 2)));
|
||||
}
|
||||
|
||||
function sliceStyleSpans(
|
||||
styles: MarkdownIR["styles"],
|
||||
start: number,
|
||||
end: number,
|
||||
): MarkdownIR["styles"] {
|
||||
return styles.flatMap((span) => {
|
||||
if (span.end <= start || span.start >= end) {
|
||||
return [];
|
||||
}
|
||||
const nextStart = Math.max(span.start, start) - start;
|
||||
const nextEnd = Math.min(span.end, end) - start;
|
||||
if (nextEnd <= nextStart) {
|
||||
return [];
|
||||
}
|
||||
return [{ ...span, start: nextStart, end: nextEnd }];
|
||||
});
|
||||
}
|
||||
|
||||
function sliceLinkSpans(
|
||||
links: MarkdownIR["links"],
|
||||
start: number,
|
||||
end: number,
|
||||
): MarkdownIR["links"] {
|
||||
return links.flatMap((link) => {
|
||||
if (link.end <= start || link.start >= end) {
|
||||
return [];
|
||||
}
|
||||
const nextStart = Math.max(link.start, start) - start;
|
||||
const nextEnd = Math.min(link.end, end) - start;
|
||||
if (nextEnd <= nextStart) {
|
||||
return [];
|
||||
}
|
||||
return [{ ...link, start: nextStart, end: nextEnd }];
|
||||
});
|
||||
}
|
||||
|
||||
function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): MarkdownIR[] {
|
||||
if (!ir.text) {
|
||||
return [];
|
||||
}
|
||||
const normalizedLimit = Math.max(1, Math.floor(limit));
|
||||
if (normalizedLimit <= 0 || ir.text.length <= normalizedLimit) {
|
||||
return [ir];
|
||||
}
|
||||
const chunks: MarkdownIR[] = [];
|
||||
let cursor = 0;
|
||||
while (cursor < ir.text.length) {
|
||||
const end = Math.min(ir.text.length, cursor + normalizedLimit);
|
||||
chunks.push({
|
||||
text: ir.text.slice(cursor, end),
|
||||
styles: sliceStyleSpans(ir.styles, cursor, end),
|
||||
links: sliceLinkSpans(ir.links, cursor, end),
|
||||
});
|
||||
cursor = end;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function renderTelegramChunksWithinHtmlLimit(
|
||||
|
||||
@@ -166,6 +166,14 @@ describe("markdownToTelegramChunks - file reference wrapping", () => {
|
||||
expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
|
||||
expect(chunks.every((chunk) => chunk.html.length <= 512)).toBe(true);
|
||||
});
|
||||
|
||||
it("preserves whitespace when html-limit retry splitting runs", () => {
|
||||
const input = "a < b";
|
||||
const chunks = markdownToTelegramChunks(input, 5);
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
expect(chunks.map((chunk) => chunk.text).join("")).toBe(input);
|
||||
expect(chunks.every((chunk) => chunk.html.length <= 5)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
|
||||
Reference in New Issue
Block a user