fix(telegram): use regex literal and depth counters for tag tracking

Code review fixes:
1. Replace RegExp constructor with regex literal for autoLinkedAnchor
   - Avoids double-escaping issues with \s
   - Uses backreference \1 to match href=label pattern directly

2. Replace boolean toggles with depth counters for tag nesting
   - codeDepth, preDepth, anchorDepth track nesting levels
   - Correctly handles nested tags like <pre><code>...</code></pre>
   - Prevents wrapping inside any level of protected tags

Add 4 tests for edge cases:
- Nested code tags (depth tracking)
- Multiple anchor tags in sequence
- Auto-linked anchor with backreference match
- Anchor with different href/label (no match)
This commit is contained in:
divanoli
2026-02-05 11:36:58 +03:00
parent 58c69ee8bd
commit 8a5453e3e7
2 changed files with 40 additions and 16 deletions

View File

@@ -137,12 +137,9 @@ export function wrapFileReferencesInHtml(html: string): string {
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
// Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
const autoLinkedAnchor = new RegExp(`<a\\s+href="https?://([^"]+)"\\s*>([^<]+)</a>`, "gi");
html = html.replace(autoLinkedAnchor, (_match, href: string, label: string) => {
if (href !== label) {
return _match;
}
if (!isAutoLinkedFileRef(`http://${href}`, label)) {
const autoLinkedAnchor = /<a\s+href="https?:\/\/([^"]+)"[^>]*>\1<\/a>/gi;
html = html.replace(autoLinkedAnchor, (_match, label: string) => {
if (!isAutoLinkedFileRef(`http://${label}`, label)) {
return _match;
}
return `<code>${label}</code>`;
@@ -152,10 +149,10 @@ export function wrapFileReferencesInHtml(html: string): string {
"gi",
);
// Track if we're inside tags that should not be modified
let inCode = false;
let inPre = false;
let inAnchor = false;
// Track nesting depth for tags that should not be modified
let codeDepth = 0;
let preDepth = 0;
let anchorDepth = 0;
let result = "";
let lastIndex = 0;
@@ -173,7 +170,7 @@ export function wrapFileReferencesInHtml(html: string): string {
const textBefore = html.slice(lastIndex, tagStart);
result += textBefore.replace(filePattern, (m, prefix, filename) => {
// Skip if inside protected tags or if it's a URL
if (inCode || inPre || inAnchor) {
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
return m;
}
if (filename.startsWith("//")) {
@@ -185,13 +182,13 @@ export function wrapFileReferencesInHtml(html: string): string {
return `${prefix}<code>${filename}</code>`;
});
// Update tag state
// Update tag depth
if (tagName === "code") {
inCode = !isClosing;
codeDepth += isClosing ? -1 : 1;
} else if (tagName === "pre") {
inPre = !isClosing;
preDepth += isClosing ? -1 : 1;
} else if (tagName === "a") {
inAnchor = !isClosing;
anchorDepth += isClosing ? -1 : 1;
}
// Add the tag itself
@@ -202,7 +199,7 @@ export function wrapFileReferencesInHtml(html: string): string {
// Process remaining text
const remainingText = html.slice(lastIndex);
result += remainingText.replace(filePattern, (m, prefix, filename) => {
if (inCode || inPre || inAnchor) {
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
return m;
}
if (filename.startsWith("//")) {

View File

@@ -250,4 +250,31 @@ describe("edge cases", () => {
expect(result).toContain("<code>README.MD</code>");
expect(result).toContain("<code>SCRIPT.PY</code>");
});
it("handles nested code tags (depth tracking)", () => {
// Nested <code> inside <pre> - should not wrap inner content
const input = "<pre><code>README.md</code></pre> then script.py";
const result = wrapFileReferencesInHtml(input);
expect(result).toBe("<pre><code>README.md</code></pre> then <code>script.py</code>");
});
it("handles multiple anchor tags in sequence", () => {
const input =
'<a href="https://a.com">link1</a> README.md <a href="https://b.com">link2</a> script.py';
const result = wrapFileReferencesInHtml(input);
expect(result).toContain("</a> <code>README.md</code> <a");
expect(result).toContain("</a> <code>script.py</code>");
});
it("handles auto-linked anchor with backreference match", () => {
// The regex uses \1 backreference - href must equal label
const input = '<a href="http://README.md">README.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
});
it("preserves anchor when href and label differ (no backreference match)", () => {
// Different href and label - should NOT de-linkify
const input = '<a href="http://other.md">README.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe(input);
});
});