fix(telegram): prevent orphaned TLD wrapping inside HTML tags

Code review fixes:

1. Orphaned TLD pass now checks if match is inside HTML tag
   - Uses lastIndexOf('<') vs lastIndexOf('>') to detect tag context
   - Skips wrapping when between < and > (inside attributes)
   - Prevents invalid HTML like <a href="...&<code>D.md</code>">

2. textMode: 'html' now trusts caller markup
   - Returns text unchanged instead of wrapping
   - Caller owns HTML structure in this mode

Tests added:
- 'does not wrap orphaned TLD inside href attributes'
- 'does not wrap orphaned TLD inside any HTML attribute'
- 'does not wrap in HTML mode (trusts caller markup)'
This commit is contained in:
divanoli
2026-02-05 12:23:24 +03:00
parent 94851de4f8
commit 352398b9a5
2 changed files with 29 additions and 6 deletions

View File

@@ -222,11 +222,17 @@ export function wrapFileReferencesInHtml(html: string): string {
`([^a-zA-Z0-9]|^)([A-Za-z]\\.(?:${extensionsPattern}))(?=[^a-zA-Z0-9/]|$)`,
"g",
);
result = result.replace(orphanedTldPattern, (m, prefix, tld) => {
// Skip if already wrapped in a tag (check for < before or > after in context)
result = result.replace(orphanedTldPattern, (m, prefix, tld, offset) => {
// Skip if prefix is > (right after a tag close)
if (prefix === ">") {
return m;
}
// Skip if we're inside an HTML tag (between < and >)
const lastOpen = result.lastIndexOf("<", offset);
const lastClose = result.lastIndexOf(">", offset);
if (lastOpen > lastClose) {
return m; // Inside a tag
}
return `${prefix}<code>${escapeHtml(tld)}</code>`;
});
@@ -239,8 +245,8 @@ export function renderTelegramHtmlText(
): string {
const textMode = options.textMode ?? "markdown";
if (textMode === "html") {
// For HTML mode, still wrap file references in the HTML
return wrapFileReferencesInHtml(text);
// For HTML mode, trust caller markup - don't modify
return text;
}
// markdownToTelegramHtml already wraps file references by default
return markdownToTelegramHtml(text, { tableMode: options.tableMode });

View File

@@ -101,9 +101,11 @@ describe("renderTelegramHtmlText - file reference wrapping", () => {
expect(result).toContain("<code>README.md</code>");
});
it("wraps file references in HTML mode", () => {
it("does not wrap in HTML mode (trusts caller markup)", () => {
// textMode: "html" should pass through unchanged - caller owns the markup
const result = renderTelegramHtmlText("Check README.md", { textMode: "html" });
expect(result).toContain("<code>README.md</code>");
expect(result).toBe("Check README.md");
expect(result).not.toContain("<code>");
});
it("does not double-wrap already code-formatted content", () => {
@@ -324,4 +326,19 @@ describe("edge cases", () => {
// Only x.md gets wrapped, the rest passes through
expect(result).toBe("<code>x.md</code></code><b>bold</b>");
});
it("does not wrap orphaned TLD inside href attributes", () => {
// D.md inside href should NOT be wrapped
const input = '<a href="http://example.com/R&D.md">link</a>';
const result = wrapFileReferencesInHtml(input);
// href should be untouched
expect(result).toBe(input);
expect(result).not.toContain("<code>D.md</code>");
});
it("does not wrap orphaned TLD inside any HTML attribute", () => {
const input = '<img src="logo/R&D.md" alt="R&D.md">';
const result = wrapFileReferencesInHtml(input);
expect(result).toBe(input);
});
});