fix(telegram): clamp depth counters and add anchor tracking to orphaned pass

- Clamp depth counters at 0 for malformed HTML with stray closing tags
- Add anchor depth tracking to orphaned TLD pass to prevent wrapping
  inside link text (e.g., <a href="...">R&D.md</a>)
- 57 tests covering all edge cases

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
divanoli
2026-02-05 13:11:18 +03:00
parent e32dcb566d
commit 6bf2ced2f5
2 changed files with 30 additions and 7 deletions

View File

@@ -187,13 +187,13 @@ export function wrapFileReferencesInHtml(html: string): string {
return `${prefix}<code>${escapeHtml(filename)}</code>`;
});
// Update tag depth
// Update tag depth (clamp at 0 for malformed HTML with stray closing tags)
if (tagName === "code") {
codeDepth += isClosing ? -1 : 1;
codeDepth = isClosing ? Math.max(0, codeDepth - 1) : codeDepth + 1;
} else if (tagName === "pre") {
preDepth += isClosing ? -1 : 1;
preDepth = isClosing ? Math.max(0, preDepth - 1) : preDepth + 1;
} else if (tagName === "a") {
anchorDepth += isClosing ? -1 : 1;
anchorDepth = isClosing ? Math.max(0, anchorDepth - 1) : anchorDepth + 1;
}
// Add the tag itself
@@ -236,14 +236,16 @@ export function wrapFileReferencesInHtml(html: string): string {
if (lastOpen > lastClose) {
return m; // Inside a tag attribute
}
// Skip if inside code/pre tags (count opens vs closes before offset)
// Skip if inside code/pre/anchor tags (count opens vs closes before offset)
const textBefore = snapshot.slice(0, offset);
const codeOpens = (textBefore.match(/<code/gi) || []).length;
const codeCloses = (textBefore.match(/<\/code/gi) || []).length;
const preOpens = (textBefore.match(/<pre/gi) || []).length;
const preCloses = (textBefore.match(/<\/pre/gi) || []).length;
if (codeOpens > codeCloses || preOpens > preCloses) {
return m; // Inside code/pre content
const anchorOpens = (textBefore.match(/<a[\s>]/gi) || []).length;
const anchorCloses = (textBefore.match(/<\/a/gi) || []).length;
if (codeOpens > codeCloses || preOpens > preCloses || anchorOpens > anchorCloses) {
return m; // Inside code/pre/anchor content
}
return `${prefix}<code>${escapeHtml(tld)}</code>`;
});

View File

@@ -336,6 +336,27 @@ describe("edge cases", () => {
expect(result).not.toContain("</code></code>");
});
it("does not wrap orphaned TLD inside anchor link text", () => {
// R&D.md inside anchor text should NOT have D.md wrapped
const input = '<a href="https://example.com">R&D.md</a>';
const result = wrapFileReferencesInHtml(input);
expect(result).toBe(input);
expect(result).not.toContain("<code>D.md</code>");
});
it("handles malformed HTML with stray closing tags (negative depth)", () => {
// Stray </code> before content shouldn't break protection logic
// (depth should clamp at 0, not go negative)
const input = "</code>README.md<code>inside</code> after.md";
const result = wrapFileReferencesInHtml(input);
// README.md should be wrapped (codeDepth = 0 after clamping stray close)
expect(result).toContain("<code>README.md</code>");
// after.md should be wrapped (codeDepth = 0 after proper close)
expect(result).toContain("<code>after.md</code>");
// Should not have nested code tags
expect(result).not.toContain("<code><code>");
});
it("does not wrap orphaned TLD inside href attributes", () => {
// D.md inside href should NOT be wrapped
const input = '<a href="http://example.com/R&D.md">link</a>';