fix(telegram): prevent URL previews for file refs with TLD extensions

Two layers were causing spurious link previews for file references like
`README.md`, `backup.sh`, `main.go`:

1. **markdown-it linkify** converts `README.md` to
   `<a href="http://README.md">README.md</a>` (.md = Moldova TLD)
2. **Telegram auto-linker** treats remaining bare text as URLs

## Changes

### Primary fix: suppress auto-linkified file refs in buildTelegramLink
- Added `isAutoLinkedFileRef()` helper that detects when linkify auto-
  generated a link from a bare filename (href = "http://" + label)
- Rejects paths with domain-like segments (dots in non-final path parts)
- Modified `buildTelegramLink()` to return null for these, so file refs
  stay as plain text and get wrapped in `<code>` by the wrapper

### Safety-net: de-linkify in wrapFileReferencesInHtml
- Added pre-pass that catches auto-linkified anchors in pre-rendered HTML
- Handles edge cases where HTML is passed directly (textMode: "html")
- Reuses `isAutoLinkedFileRef()` logic — no duplication

### Bug fixes discovered during review
- **Fixed `isClosing` bug (line 169)**: the check `match[1] === "/"`
  was wrong — the regex `(<\/?)}` captures `<` or `</`, so closing
  tags were never detected. Changed to `match[1] === "</"`. This was
  causing `inCode/inPre/inAnchor` to stay stuck at true after any
  opening tag, breaking file ref wrapping after closing tags.
- **Removed double `wrapFileReferencesInHtml` call**: `renderTelegramHtmlText`
  was calling `markdownToTelegramHtml` (which wraps) then wrapping again.

### Test coverage (+12 tests, 26 total)
- `.sh` filenames (original issue #6932 mentioned backup.sh)
- Auto-linkified anchor replacement
- Auto-linkified path anchor replacement
- Explicit link preservation (different label)
- File ref after closing anchor tag (exercises isClosing fix)
- Multiple file types in single message
- Real URL preservation
- Explicit markdown link preservation
- File ref after real URL in same message
- Chunked output file ref wrapping

Closes #6932
This commit is contained in:
divanoli
2026-02-05 10:47:39 +03:00
parent 70f73e6f8d
commit 99311daaed
2 changed files with 132 additions and 34 deletions

View File

@@ -20,7 +20,57 @@ function escapeHtmlAttr(text: string): string {
return escapeHtml(text).replace(/"/g, "&quot;");
}
function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
/**
* File extensions that share TLDs and commonly appear in code/documentation.
* These are wrapped in <code> tags to prevent Telegram from generating
* spurious domain registrar previews.
*/
const FILE_EXTENSIONS_WITH_TLD = new Set([
// High priority - commonly referenced in messages
"md", // Markdown (Moldova)
"go", // Go language
"py", // Python (Paraguay)
"pl", // Perl (Poland)
"ai", // Adobe Illustrator (Anguilla)
"sh", // Shell (Saint Helena)
// Medium priority - sometimes referenced
"io", // Tuvalu (often used for tech projects)
"tv", // Tuvalu (video files)
"fm", // Federated States of Micronesia (audio)
"am", // Armenia
"at", // Austria
"be", // Belgium
"cc", // Cocos Islands
"co", // Colombia
]);
/** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */
function isAutoLinkedFileRef(href: string, label: string): boolean {
const stripped = href.replace(/^https?:\/\//i, "");
if (stripped !== label) {
return false;
}
const dotIndex = label.lastIndexOf(".");
if (dotIndex < 1) {
return false;
}
const ext = label.slice(dotIndex + 1).toLowerCase();
if (!FILE_EXTENSIONS_WITH_TLD.has(ext)) {
return false;
}
// Reject if any path segment before the filename contains a dot (looks like a domain)
const segments = label.split("/");
if (segments.length > 1) {
for (let i = 0; i < segments.length - 1; i++) {
if (segments[i].includes(".")) {
return false;
}
}
}
return true;
}
function buildTelegramLink(link: MarkdownLinkSpan, text: string) {
const href = link.href.trim();
if (!href) {
return null;
@@ -28,6 +78,11 @@ function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
if (link.start === link.end) {
return null;
}
// Suppress auto-linkified file references (e.g. README.md → http://README.md)
const label = text.slice(link.start, link.end);
if (isAutoLinkedFileRef(href, label)) {
return null;
}
const safeHref = escapeHtmlAttr(href);
return {
start: link.start,
@@ -69,30 +124,6 @@ export function markdownToTelegramHtml(
return html;
}
/**
* File extensions that share TLDs and commonly appear in code/documentation.
* These are wrapped in <code> tags to prevent Telegram from generating
* spurious domain registrar previews.
*/
const FILE_EXTENSIONS_WITH_TLD = new Set([
// High priority - commonly referenced in messages
"md", // Markdown (Moldova)
"go", // Go language
"py", // Python (Paraguay)
"pl", // Perl (Poland)
"ai", // Adobe Illustrator (Anguilla)
"sh", // Shell (Saint Helena)
// Medium priority - sometimes referenced
"io", // Tuvalu (often used for tech projects)
"tv", // Tuvalu (video files)
"fm", // Federated States of Micronesia (audio)
"am", // Armenia
"at", // Austria
"be", // Belgium
"cc", // Cocos Islands
"co", // Colombia
]);
/**
* Wraps standalone file references (with TLD extensions) in <code> tags.
* This prevents Telegram from treating them as URLs and generating
@@ -104,6 +135,18 @@ const FILE_EXTENSIONS_WITH_TLD = new Set([
export function wrapFileReferencesInHtml(html: string): string {
// Build regex pattern for all tracked extensions
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
// Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
const autoLinkedAnchor = new RegExp(`<a\\s+href="https?://([^"]+)"\\s*>([^<]+)</a>`, "gi");
html = html.replace(autoLinkedAnchor, (_match, href: string, label: string) => {
if (href !== label) {
return _match;
}
if (!isAutoLinkedFileRef(`http://${href}`, label)) {
return _match;
}
return `<code>${label}</code>`;
});
const filePattern = new RegExp(
`(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
"gi",
@@ -123,7 +166,7 @@ export function wrapFileReferencesInHtml(html: string): string {
while ((match = tagPattern.exec(html)) !== null) {
const tagStart = match.index;
const tagEnd = tagPattern.lastIndex;
const isClosing = match[1] === "/";
const isClosing = match[1] === "</";
const tagName = match[2].toLowerCase();
// Process text before this tag
@@ -183,10 +226,8 @@ export function renderTelegramHtmlText(
// For HTML mode, still wrap file references in the HTML
return wrapFileReferencesInHtml(text);
}
const html = markdownToTelegramHtml(text, { tableMode: options.tableMode });
// Wrap file references after markdown→HTML conversion
// This ensures we only transform text nodes, not HTML attributes
return wrapFileReferencesInHtml(html);
// markdownToTelegramHtml already wraps file references by default
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
}
export function markdownToTelegramChunks(

View File

@@ -1,5 +1,6 @@
import { describe, expect, it } from "vitest";
import {
markdownToTelegramChunks,
markdownToTelegramHtml,
renderTelegramHtmlText,
wrapFileReferencesInHtml,
@@ -25,6 +26,10 @@ describe("wrapFileReferencesInHtml", () => {
expect(wrapFileReferencesInHtml("Check backup.pl")).toContain("Check <code>backup.pl</code>");
});
it("wraps .sh filenames", () => {
expect(wrapFileReferencesInHtml("Run backup.sh")).toContain("Run <code>backup.sh</code>");
});
it("wraps file paths", () => {
expect(wrapFileReferencesInHtml("Look at squad/friday/HEARTBEAT.md")).toContain(
"Look at <code>squad/friday/HEARTBEAT.md</code>",
@@ -50,10 +55,10 @@ describe("wrapFileReferencesInHtml", () => {
expect(result).toBe(input);
});
it("does not wrap in URLs", () => {
const result = wrapFileReferencesInHtml("Visit https://example.com/README.md");
expect(result).toContain('href="https://example.com/README.md"');
expect(result).not.toContain("<code>README.md</code>");
it("does not wrap file refs inside real URL anchor tags", () => {
const input = 'Visit <a href="https://example.com/README.md">example.com/README.md</a>';
const result = wrapFileReferencesInHtml(input);
expect(result).toBe(input);
});
it("handles mixed content correctly", () => {
@@ -67,6 +72,27 @@ describe("wrapFileReferencesInHtml", () => {
expect(wrapFileReferencesInHtml("File.md at start")).toContain("<code>File.md</code>");
expect(wrapFileReferencesInHtml("Ends with file.md")).toContain("<code>file.md</code>");
});
it("de-linkifies auto-linkified file ref anchors", () => {
const input = '<a href="http://README.md">README.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
});
it("de-linkifies auto-linkified path anchors", () => {
const input = '<a href="http://squad/friday/HEARTBEAT.md">squad/friday/HEARTBEAT.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe("<code>squad/friday/HEARTBEAT.md</code>");
});
it("preserves explicit links where label differs from href", () => {
const input = '<a href="http://README.md">click here</a>';
expect(wrapFileReferencesInHtml(input)).toBe(input);
});
it("wraps file ref after closing anchor tag", () => {
const input = '<a href="https://example.com">link</a> then README.md';
const result = wrapFileReferencesInHtml(input);
expect(result).toContain("</a> then <code>README.md</code>");
});
});
describe("renderTelegramHtmlText - file reference wrapping", () => {
@@ -98,4 +124,35 @@ describe("markdownToTelegramHtml - file reference wrapping", () => {
const result = markdownToTelegramHtml("Check README.md", { wrapFileRefs: false });
expect(result).not.toContain("<code>README.md</code>");
});
it("wraps multiple file types in a single message", () => {
const result = markdownToTelegramHtml("Edit main.go and script.py");
expect(result).toContain("<code>main.go</code>");
expect(result).toContain("<code>script.py</code>");
});
it("preserves real URLs as anchor tags", () => {
const result = markdownToTelegramHtml("Visit https://example.com");
expect(result).toContain('<a href="https://example.com">');
});
it("preserves explicit markdown links even when href looks like a file ref", () => {
const result = markdownToTelegramHtml("[docs](http://README.md)");
expect(result).toContain('<a href="http://README.md">docs</a>');
});
it("wraps file ref after real URL in same message", () => {
const result = markdownToTelegramHtml("Visit https://example.com and README.md");
expect(result).toContain('<a href="https://example.com">');
expect(result).toContain("<code>README.md</code>");
});
});
describe("markdownToTelegramChunks - file reference wrapping", () => {
it("wraps file references in chunked output", () => {
const chunks = markdownToTelegramChunks("Check README.md and backup.sh", 4096);
expect(chunks.length).toBeGreaterThan(0);
expect(chunks[0].html).toContain("<code>README.md</code>");
expect(chunks[0].html).toContain("<code>backup.sh</code>");
});
});