mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 16:54:31 +00:00
fix(signal): outbound formatting and markdown IR rendering improvements (#9781)
* fix: Signal and markdown formatting improvements Markdown IR fixes: - Fix list-paragraph spacing (extra newline between list items and following paragraphs) - Fix nested list indentation and newline handling - Fix blockquote_close emitting redundant newline (inner content handles spacing) - Render horizontal rules as visible ─── separator instead of silent drop - Strip inner cell styles in code-mode tables to prevent overlapping with code_block span Signal formatting fixes: - Normalize URLs for dedup comparison (strip protocol, www., trailing slash) - Render headings as bold text (headingStyle: 'bold') - Add '> ' prefix to blockquotes for visual distinction - Re-chunk after link expansion to respect chunk size limits Tests: - 51 new tests for markdown IR (spacing, lists, blockquotes, tables, HR) - 18 new tests for Signal formatting (URL dedup, headings, blockquotes, HR, chunking) - Update Slack nested list test expectation to match corrected IR output * refactor: style-aware Signal text chunker Replace indexOf-based chunk position tracking with deterministic cursor tracking. The new splitSignalFormattedText: - Splits at whitespace/newline boundaries within the limit - Avoids breaking inside parentheses (preserves expanded link URLs) - Slices style ranges at chunk boundaries with correct local offsets - Tracks position via offset arithmetic instead of fragile indexOf Removes dependency on chunkText from auto-reply/chunk. Tests: 19 new tests covering style preservation across chunk boundaries, edge cases (empty text, under limit, exact split points), and integration with link expansion. * fix: correct Signal style offsets with multiple link expansions applyInsertionsToStyles() was using original coordinates for each insertion without tracking cumulative shift from prior insertions. This caused bold/italic/etc styles to drift to wrong text positions when multiple markdown links expanded in a single message. Added cumulative shift tracking and a regression test. * test: clean up test noise and fix ineffective assertions - Remove console.log from ir.list-spacing and ir.hr-spacing tests - Fix ir.nested-lists.test.ts: remove ineffective regex assertion - Fix ir.hr-spacing.test.ts: add actual assertions to edge case test * refactor: split Signal formatting tests (#9781) (thanks @heyhudson) --------- Co-authored-by: Hudson <258693705+hudson-rivera@users.noreply.github.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
@@ -34,6 +34,17 @@ type Insertion = {
|
||||
length: number;
|
||||
};
|
||||
|
||||
function normalizeUrlForComparison(url: string): string {
|
||||
let normalized = url.toLowerCase();
|
||||
// Strip protocol
|
||||
normalized = normalized.replace(/^https?:\/\//, "");
|
||||
// Strip www. prefix
|
||||
normalized = normalized.replace(/^www\./, "");
|
||||
// Strip trailing slashes
|
||||
normalized = normalized.replace(/\/+$/, "");
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function mapStyle(style: MarkdownStyle): SignalTextStyle | null {
|
||||
switch (style) {
|
||||
case "bold":
|
||||
@@ -100,15 +111,17 @@ function applyInsertionsToStyles(
|
||||
}
|
||||
const sortedInsertions = [...insertions].toSorted((a, b) => a.pos - b.pos);
|
||||
let updated = spans;
|
||||
let cumulativeShift = 0;
|
||||
|
||||
for (const insertion of sortedInsertions) {
|
||||
const insertionPos = insertion.pos + cumulativeShift;
|
||||
const next: SignalStyleSpan[] = [];
|
||||
for (const span of updated) {
|
||||
if (span.end <= insertion.pos) {
|
||||
if (span.end <= insertionPos) {
|
||||
next.push(span);
|
||||
continue;
|
||||
}
|
||||
if (span.start >= insertion.pos) {
|
||||
if (span.start >= insertionPos) {
|
||||
next.push({
|
||||
start: span.start + insertion.length,
|
||||
end: span.end + insertion.length,
|
||||
@@ -116,15 +129,15 @@ function applyInsertionsToStyles(
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (span.start < insertion.pos && span.end > insertion.pos) {
|
||||
if (insertion.pos > span.start) {
|
||||
if (span.start < insertionPos && span.end > insertionPos) {
|
||||
if (insertionPos > span.start) {
|
||||
next.push({
|
||||
start: span.start,
|
||||
end: insertion.pos,
|
||||
end: insertionPos,
|
||||
style: span.style,
|
||||
});
|
||||
}
|
||||
const shiftedStart = insertion.pos + insertion.length;
|
||||
const shiftedStart = insertionPos + insertion.length;
|
||||
const shiftedEnd = span.end + insertion.length;
|
||||
if (shiftedEnd > shiftedStart) {
|
||||
next.push({
|
||||
@@ -136,6 +149,7 @@ function applyInsertionsToStyles(
|
||||
}
|
||||
}
|
||||
updated = next;
|
||||
cumulativeShift += insertion.length;
|
||||
}
|
||||
|
||||
return updated;
|
||||
@@ -161,16 +175,26 @@ function renderSignalText(ir: MarkdownIR): SignalFormattedText {
|
||||
const href = link.href.trim();
|
||||
const label = text.slice(link.start, link.end);
|
||||
const trimmedLabel = label.trim();
|
||||
const comparableHref = href.startsWith("mailto:") ? href.slice("mailto:".length) : href;
|
||||
|
||||
if (href) {
|
||||
if (!trimmedLabel) {
|
||||
out += href;
|
||||
insertions.push({ pos: link.end, length: href.length });
|
||||
} else if (trimmedLabel !== href && trimmedLabel !== comparableHref) {
|
||||
const addition = ` (${href})`;
|
||||
out += addition;
|
||||
insertions.push({ pos: link.end, length: addition.length });
|
||||
} else {
|
||||
// Check if label is similar enough to URL that showing both would be redundant
|
||||
const normalizedLabel = normalizeUrlForComparison(trimmedLabel);
|
||||
let comparableHref = href;
|
||||
if (href.startsWith("mailto:")) {
|
||||
comparableHref = href.slice("mailto:".length);
|
||||
}
|
||||
const normalizedHref = normalizeUrlForComparison(comparableHref);
|
||||
|
||||
// Only show URL if label is meaningfully different from it
|
||||
if (normalizedLabel !== normalizedHref) {
|
||||
const addition = ` (${href})`;
|
||||
out += addition;
|
||||
insertions.push({ pos: link.end, length: addition.length });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,13 +238,136 @@ export function markdownToSignalText(
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: true,
|
||||
enableSpoilers: true,
|
||||
headingStyle: "none",
|
||||
blockquotePrefix: "",
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
tableMode: options.tableMode,
|
||||
});
|
||||
return renderSignalText(ir);
|
||||
}
|
||||
|
||||
function sliceSignalStyles(
|
||||
styles: SignalTextStyleRange[],
|
||||
start: number,
|
||||
end: number,
|
||||
): SignalTextStyleRange[] {
|
||||
const sliced: SignalTextStyleRange[] = [];
|
||||
for (const style of styles) {
|
||||
const styleEnd = style.start + style.length;
|
||||
const sliceStart = Math.max(style.start, start);
|
||||
const sliceEnd = Math.min(styleEnd, end);
|
||||
if (sliceEnd > sliceStart) {
|
||||
sliced.push({
|
||||
start: sliceStart - start,
|
||||
length: sliceEnd - sliceStart,
|
||||
style: style.style,
|
||||
});
|
||||
}
|
||||
}
|
||||
return sliced;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split Signal formatted text into chunks under the limit while preserving styles.
|
||||
*
|
||||
* This implementation deterministically tracks cursor position without using indexOf,
|
||||
* which is fragile when chunks are trimmed or when duplicate substrings exist.
|
||||
* Styles spanning chunk boundaries are split into separate ranges for each chunk.
|
||||
*/
|
||||
function splitSignalFormattedText(
|
||||
formatted: SignalFormattedText,
|
||||
limit: number,
|
||||
): SignalFormattedText[] {
|
||||
const { text, styles } = formatted;
|
||||
|
||||
if (text.length <= limit) {
|
||||
return [formatted];
|
||||
}
|
||||
|
||||
const results: SignalFormattedText[] = [];
|
||||
let remaining = text;
|
||||
let offset = 0; // Track position in original text for style slicing
|
||||
|
||||
while (remaining.length > 0) {
|
||||
if (remaining.length <= limit) {
|
||||
// Last chunk - take everything remaining
|
||||
const trimmed = remaining.trimEnd();
|
||||
if (trimmed.length > 0) {
|
||||
results.push({
|
||||
text: trimmed,
|
||||
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + trimmed.length)),
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Find a good break point within the limit
|
||||
const window = remaining.slice(0, limit);
|
||||
let breakIdx = findBreakIndex(window);
|
||||
|
||||
// If no good break point found, hard break at limit
|
||||
if (breakIdx <= 0) {
|
||||
breakIdx = limit;
|
||||
}
|
||||
|
||||
// Extract chunk and trim trailing whitespace
|
||||
const rawChunk = remaining.slice(0, breakIdx);
|
||||
const chunk = rawChunk.trimEnd();
|
||||
|
||||
if (chunk.length > 0) {
|
||||
results.push({
|
||||
text: chunk,
|
||||
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + chunk.length)),
|
||||
});
|
||||
}
|
||||
|
||||
// Advance past the chunk and any whitespace separator
|
||||
const brokeOnWhitespace = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnWhitespace ? 1 : 0));
|
||||
|
||||
// Chunks are sent as separate messages, so we intentionally drop boundary whitespace.
|
||||
// Keep `offset` in sync with the dropped characters so style slicing stays correct.
|
||||
remaining = remaining.slice(nextStart).trimStart();
|
||||
offset = text.length - remaining.length;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the best break index within a text window.
|
||||
* Prefers newlines over whitespace, avoids breaking inside parentheses.
|
||||
*/
|
||||
function findBreakIndex(window: string): number {
|
||||
let lastNewline = -1;
|
||||
let lastWhitespace = -1;
|
||||
let parenDepth = 0;
|
||||
|
||||
for (let i = 0; i < window.length; i++) {
|
||||
const char = window[i];
|
||||
|
||||
if (char === "(") {
|
||||
parenDepth++;
|
||||
continue;
|
||||
}
|
||||
if (char === ")" && parenDepth > 0) {
|
||||
parenDepth--;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only consider break points outside parentheses
|
||||
if (parenDepth === 0) {
|
||||
if (char === "\n") {
|
||||
lastNewline = i;
|
||||
} else if (/\s/.test(char)) {
|
||||
lastWhitespace = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer newline break, fall back to whitespace
|
||||
return lastNewline > 0 ? lastNewline : lastWhitespace;
|
||||
}
|
||||
|
||||
export function markdownToSignalTextChunks(
|
||||
markdown: string,
|
||||
limit: number,
|
||||
@@ -229,10 +376,22 @@ export function markdownToSignalTextChunks(
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: true,
|
||||
enableSpoilers: true,
|
||||
headingStyle: "none",
|
||||
blockquotePrefix: "",
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
tableMode: options.tableMode,
|
||||
});
|
||||
const chunks = chunkMarkdownIR(ir, limit);
|
||||
return chunks.map((chunk) => renderSignalText(chunk));
|
||||
const results: SignalFormattedText[] = [];
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const rendered = renderSignalText(chunk);
|
||||
// If link expansion caused the chunk to exceed the limit, re-chunk it
|
||||
if (rendered.text.length > limit) {
|
||||
results.push(...splitSignalFormattedText(rendered, limit));
|
||||
} else {
|
||||
results.push(rendered);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user