fix(signal): outbound formatting and markdown IR rendering improvements (#9781)

* fix: Signal and markdown formatting improvements

Markdown IR fixes:
- Fix list-paragraph spacing (extra newline between list items and following paragraphs)
- Fix nested list indentation and newline handling
- Fix blockquote_close emitting redundant newline (inner content handles spacing)
- Render horizontal rules as visible ─── separator instead of silent drop
- Strip inner cell styles in code-mode tables to prevent overlapping with code_block span

Signal formatting fixes:
- Normalize URLs for dedup comparison (strip protocol, www., trailing slash)
- Render headings as bold text (headingStyle: 'bold')
- Add '> ' prefix to blockquotes for visual distinction
- Re-chunk after link expansion to respect chunk size limits

Tests:
- 51 new tests for markdown IR (spacing, lists, blockquotes, tables, HR)
- 18 new tests for Signal formatting (URL dedup, headings, blockquotes, HR, chunking)
- Update Slack nested list test expectation to match corrected IR output

* refactor: style-aware Signal text chunker

Replace indexOf-based chunk position tracking with deterministic
cursor tracking. The new splitSignalFormattedText:

- Splits at whitespace/newline boundaries within the limit
- Avoids breaking inside parentheses (preserves expanded link URLs)
- Slices style ranges at chunk boundaries with correct local offsets
- Tracks position via offset arithmetic instead of fragile indexOf

Removes dependency on chunkText from auto-reply/chunk.

Tests: 19 new tests covering style preservation across chunk boundaries,
edge cases (empty text, under limit, exact split points), and integration
with link expansion.

* fix: correct Signal style offsets with multiple link expansions

applyInsertionsToStyles() was using original coordinates for each
insertion without tracking cumulative shift from prior insertions.
This caused bold/italic/etc styles to drift to wrong text positions
when multiple markdown links expanded in a single message.

Added cumulative shift tracking and a regression test.

* test: clean up test noise and fix ineffective assertions

- Remove console.log from ir.list-spacing and ir.hr-spacing tests
- Fix ir.nested-lists.test.ts: remove ineffective regex assertion
- Fix ir.hr-spacing.test.ts: add actual assertions to edge case test

* refactor: split Signal formatting tests (#9781) (thanks @heyhudson)

---------

Co-authored-by: Hudson <258693705+hudson-rivera@users.noreply.github.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
Hudson
2026-02-14 10:57:20 -05:00
committed by GitHub
parent 226bf74634
commit 1d6abddb9f
12 changed files with 1530 additions and 24 deletions

View File

@@ -34,6 +34,17 @@ type Insertion = {
length: number;
};
function normalizeUrlForComparison(url: string): string {
let normalized = url.toLowerCase();
// Strip protocol
normalized = normalized.replace(/^https?:\/\//, "");
// Strip www. prefix
normalized = normalized.replace(/^www\./, "");
// Strip trailing slashes
normalized = normalized.replace(/\/+$/, "");
return normalized;
}
function mapStyle(style: MarkdownStyle): SignalTextStyle | null {
switch (style) {
case "bold":
@@ -100,15 +111,17 @@ function applyInsertionsToStyles(
}
const sortedInsertions = [...insertions].toSorted((a, b) => a.pos - b.pos);
let updated = spans;
let cumulativeShift = 0;
for (const insertion of sortedInsertions) {
const insertionPos = insertion.pos + cumulativeShift;
const next: SignalStyleSpan[] = [];
for (const span of updated) {
if (span.end <= insertion.pos) {
if (span.end <= insertionPos) {
next.push(span);
continue;
}
if (span.start >= insertion.pos) {
if (span.start >= insertionPos) {
next.push({
start: span.start + insertion.length,
end: span.end + insertion.length,
@@ -116,15 +129,15 @@ function applyInsertionsToStyles(
});
continue;
}
if (span.start < insertion.pos && span.end > insertion.pos) {
if (insertion.pos > span.start) {
if (span.start < insertionPos && span.end > insertionPos) {
if (insertionPos > span.start) {
next.push({
start: span.start,
end: insertion.pos,
end: insertionPos,
style: span.style,
});
}
const shiftedStart = insertion.pos + insertion.length;
const shiftedStart = insertionPos + insertion.length;
const shiftedEnd = span.end + insertion.length;
if (shiftedEnd > shiftedStart) {
next.push({
@@ -136,6 +149,7 @@ function applyInsertionsToStyles(
}
}
updated = next;
cumulativeShift += insertion.length;
}
return updated;
@@ -161,16 +175,26 @@ function renderSignalText(ir: MarkdownIR): SignalFormattedText {
const href = link.href.trim();
const label = text.slice(link.start, link.end);
const trimmedLabel = label.trim();
const comparableHref = href.startsWith("mailto:") ? href.slice("mailto:".length) : href;
if (href) {
if (!trimmedLabel) {
out += href;
insertions.push({ pos: link.end, length: href.length });
} else if (trimmedLabel !== href && trimmedLabel !== comparableHref) {
const addition = ` (${href})`;
out += addition;
insertions.push({ pos: link.end, length: addition.length });
} else {
// Check if label is similar enough to URL that showing both would be redundant
const normalizedLabel = normalizeUrlForComparison(trimmedLabel);
let comparableHref = href;
if (href.startsWith("mailto:")) {
comparableHref = href.slice("mailto:".length);
}
const normalizedHref = normalizeUrlForComparison(comparableHref);
// Only show URL if label is meaningfully different from it
if (normalizedLabel !== normalizedHref) {
const addition = ` (${href})`;
out += addition;
insertions.push({ pos: link.end, length: addition.length });
}
}
}
@@ -214,13 +238,136 @@ export function markdownToSignalText(
const ir = markdownToIR(markdown ?? "", {
linkify: true,
enableSpoilers: true,
headingStyle: "none",
blockquotePrefix: "",
headingStyle: "bold",
blockquotePrefix: "> ",
tableMode: options.tableMode,
});
return renderSignalText(ir);
}
function sliceSignalStyles(
styles: SignalTextStyleRange[],
start: number,
end: number,
): SignalTextStyleRange[] {
const sliced: SignalTextStyleRange[] = [];
for (const style of styles) {
const styleEnd = style.start + style.length;
const sliceStart = Math.max(style.start, start);
const sliceEnd = Math.min(styleEnd, end);
if (sliceEnd > sliceStart) {
sliced.push({
start: sliceStart - start,
length: sliceEnd - sliceStart,
style: style.style,
});
}
}
return sliced;
}
/**
* Split Signal formatted text into chunks under the limit while preserving styles.
*
* This implementation deterministically tracks cursor position without using indexOf,
* which is fragile when chunks are trimmed or when duplicate substrings exist.
* Styles spanning chunk boundaries are split into separate ranges for each chunk.
*/
function splitSignalFormattedText(
formatted: SignalFormattedText,
limit: number,
): SignalFormattedText[] {
const { text, styles } = formatted;
if (text.length <= limit) {
return [formatted];
}
const results: SignalFormattedText[] = [];
let remaining = text;
let offset = 0; // Track position in original text for style slicing
while (remaining.length > 0) {
if (remaining.length <= limit) {
// Last chunk - take everything remaining
const trimmed = remaining.trimEnd();
if (trimmed.length > 0) {
results.push({
text: trimmed,
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + trimmed.length)),
});
}
break;
}
// Find a good break point within the limit
const window = remaining.slice(0, limit);
let breakIdx = findBreakIndex(window);
// If no good break point found, hard break at limit
if (breakIdx <= 0) {
breakIdx = limit;
}
// Extract chunk and trim trailing whitespace
const rawChunk = remaining.slice(0, breakIdx);
const chunk = rawChunk.trimEnd();
if (chunk.length > 0) {
results.push({
text: chunk,
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + chunk.length)),
});
}
// Advance past the chunk and any whitespace separator
const brokeOnWhitespace = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnWhitespace ? 1 : 0));
// Chunks are sent as separate messages, so we intentionally drop boundary whitespace.
// Keep `offset` in sync with the dropped characters so style slicing stays correct.
remaining = remaining.slice(nextStart).trimStart();
offset = text.length - remaining.length;
}
return results;
}
/**
* Find the best break index within a text window.
* Prefers newlines over whitespace, avoids breaking inside parentheses.
*/
function findBreakIndex(window: string): number {
let lastNewline = -1;
let lastWhitespace = -1;
let parenDepth = 0;
for (let i = 0; i < window.length; i++) {
const char = window[i];
if (char === "(") {
parenDepth++;
continue;
}
if (char === ")" && parenDepth > 0) {
parenDepth--;
continue;
}
// Only consider break points outside parentheses
if (parenDepth === 0) {
if (char === "\n") {
lastNewline = i;
} else if (/\s/.test(char)) {
lastWhitespace = i;
}
}
}
// Prefer newline break, fall back to whitespace
return lastNewline > 0 ? lastNewline : lastWhitespace;
}
export function markdownToSignalTextChunks(
markdown: string,
limit: number,
@@ -229,10 +376,22 @@ export function markdownToSignalTextChunks(
const ir = markdownToIR(markdown ?? "", {
linkify: true,
enableSpoilers: true,
headingStyle: "none",
blockquotePrefix: "",
headingStyle: "bold",
blockquotePrefix: "> ",
tableMode: options.tableMode,
});
const chunks = chunkMarkdownIR(ir, limit);
return chunks.map((chunk) => renderSignalText(chunk));
const results: SignalFormattedText[] = [];
for (const chunk of chunks) {
const rendered = renderSignalText(chunk);
// If link expansion caused the chunk to exceed the limit, re-chunk it
if (rendered.text.length > limit) {
results.push(...splitSignalFormattedText(rendered, limit));
} else {
results.push(rendered);
}
}
return results;
}