mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-24 10:14:26 +00:00
fix(telegram): prevent URL previews for file refs with TLD extensions
Two layers were causing spurious link previews for file references like `README.md`, `backup.sh`, `main.go`: 1. **markdown-it linkify** converts `README.md` to `<a href="http://README.md">README.md</a>` (.md = Moldova TLD) 2. **Telegram auto-linker** treats remaining bare text as URLs ## Changes ### Primary fix: suppress auto-linkified file refs in buildTelegramLink - Added `isAutoLinkedFileRef()` helper that detects when linkify auto- generated a link from a bare filename (href = "http://" + label) - Rejects paths with domain-like segments (dots in non-final path parts) - Modified `buildTelegramLink()` to return null for these, so file refs stay as plain text and get wrapped in `<code>` by the wrapper ### Safety-net: de-linkify in wrapFileReferencesInHtml - Added pre-pass that catches auto-linkified anchors in pre-rendered HTML - Handles edge cases where HTML is passed directly (textMode: "html") - Reuses `isAutoLinkedFileRef()` logic — no duplication ### Bug fixes discovered during review - **Fixed `isClosing` bug (line 169)**: the check `match[1] === "/"` was wrong — the regex `(<\/?)}` captures `<` or `</`, so closing tags were never detected. Changed to `match[1] === "</"`. This was causing `inCode/inPre/inAnchor` to stay stuck at true after any opening tag, breaking file ref wrapping after closing tags. - **Removed double `wrapFileReferencesInHtml` call**: `renderTelegramHtmlText` was calling `markdownToTelegramHtml` (which wraps) then wrapping again. ### Test coverage (+12 tests, 26 total) - `.sh` filenames (original issue #6932 mentioned backup.sh) - Auto-linkified anchor replacement - Auto-linkified path anchor replacement - Explicit link preservation (different label) - File ref after closing anchor tag (exercises isClosing fix) - Multiple file types in single message - Real URL preservation - Explicit markdown link preservation - File ref after real URL in same message - Chunked output file ref wrapping Closes #6932
This commit is contained in:
@@ -20,7 +20,57 @@ function escapeHtmlAttr(text: string): string {
|
||||
return escapeHtml(text).replace(/"/g, """);
|
||||
}
|
||||
|
||||
function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
|
||||
/**
|
||||
* File extensions that share TLDs and commonly appear in code/documentation.
|
||||
* These are wrapped in <code> tags to prevent Telegram from generating
|
||||
* spurious domain registrar previews.
|
||||
*/
|
||||
const FILE_EXTENSIONS_WITH_TLD = new Set([
|
||||
// High priority - commonly referenced in messages
|
||||
"md", // Markdown (Moldova)
|
||||
"go", // Go language
|
||||
"py", // Python (Paraguay)
|
||||
"pl", // Perl (Poland)
|
||||
"ai", // Adobe Illustrator (Anguilla)
|
||||
"sh", // Shell (Saint Helena)
|
||||
// Medium priority - sometimes referenced
|
||||
"io", // Tuvalu (often used for tech projects)
|
||||
"tv", // Tuvalu (video files)
|
||||
"fm", // Federated States of Micronesia (audio)
|
||||
"am", // Armenia
|
||||
"at", // Austria
|
||||
"be", // Belgium
|
||||
"cc", // Cocos Islands
|
||||
"co", // Colombia
|
||||
]);
|
||||
|
||||
/** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */
|
||||
function isAutoLinkedFileRef(href: string, label: string): boolean {
|
||||
const stripped = href.replace(/^https?:\/\//i, "");
|
||||
if (stripped !== label) {
|
||||
return false;
|
||||
}
|
||||
const dotIndex = label.lastIndexOf(".");
|
||||
if (dotIndex < 1) {
|
||||
return false;
|
||||
}
|
||||
const ext = label.slice(dotIndex + 1).toLowerCase();
|
||||
if (!FILE_EXTENSIONS_WITH_TLD.has(ext)) {
|
||||
return false;
|
||||
}
|
||||
// Reject if any path segment before the filename contains a dot (looks like a domain)
|
||||
const segments = label.split("/");
|
||||
if (segments.length > 1) {
|
||||
for (let i = 0; i < segments.length - 1; i++) {
|
||||
if (segments[i].includes(".")) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function buildTelegramLink(link: MarkdownLinkSpan, text: string) {
|
||||
const href = link.href.trim();
|
||||
if (!href) {
|
||||
return null;
|
||||
@@ -28,6 +78,11 @@ function buildTelegramLink(link: MarkdownLinkSpan, _text: string) {
|
||||
if (link.start === link.end) {
|
||||
return null;
|
||||
}
|
||||
// Suppress auto-linkified file references (e.g. README.md → http://README.md)
|
||||
const label = text.slice(link.start, link.end);
|
||||
if (isAutoLinkedFileRef(href, label)) {
|
||||
return null;
|
||||
}
|
||||
const safeHref = escapeHtmlAttr(href);
|
||||
return {
|
||||
start: link.start,
|
||||
@@ -69,30 +124,6 @@ export function markdownToTelegramHtml(
|
||||
return html;
|
||||
}
|
||||
|
||||
/**
|
||||
* File extensions that share TLDs and commonly appear in code/documentation.
|
||||
* These are wrapped in <code> tags to prevent Telegram from generating
|
||||
* spurious domain registrar previews.
|
||||
*/
|
||||
const FILE_EXTENSIONS_WITH_TLD = new Set([
|
||||
// High priority - commonly referenced in messages
|
||||
"md", // Markdown (Moldova)
|
||||
"go", // Go language
|
||||
"py", // Python (Paraguay)
|
||||
"pl", // Perl (Poland)
|
||||
"ai", // Adobe Illustrator (Anguilla)
|
||||
"sh", // Shell (Saint Helena)
|
||||
// Medium priority - sometimes referenced
|
||||
"io", // Tuvalu (often used for tech projects)
|
||||
"tv", // Tuvalu (video files)
|
||||
"fm", // Federated States of Micronesia (audio)
|
||||
"am", // Armenia
|
||||
"at", // Austria
|
||||
"be", // Belgium
|
||||
"cc", // Cocos Islands
|
||||
"co", // Colombia
|
||||
]);
|
||||
|
||||
/**
|
||||
* Wraps standalone file references (with TLD extensions) in <code> tags.
|
||||
* This prevents Telegram from treating them as URLs and generating
|
||||
@@ -104,6 +135,18 @@ const FILE_EXTENSIONS_WITH_TLD = new Set([
|
||||
export function wrapFileReferencesInHtml(html: string): string {
|
||||
// Build regex pattern for all tracked extensions
|
||||
const extensionsPattern = Array.from(FILE_EXTENSIONS_WITH_TLD).join("|");
|
||||
|
||||
// Safety-net: de-linkify auto-generated anchors where href="http://<label>" (defense in depth for textMode: "html")
|
||||
const autoLinkedAnchor = new RegExp(`<a\\s+href="https?://([^"]+)"\\s*>([^<]+)</a>`, "gi");
|
||||
html = html.replace(autoLinkedAnchor, (_match, href: string, label: string) => {
|
||||
if (href !== label) {
|
||||
return _match;
|
||||
}
|
||||
if (!isAutoLinkedFileRef(`http://${href}`, label)) {
|
||||
return _match;
|
||||
}
|
||||
return `<code>${label}</code>`;
|
||||
});
|
||||
const filePattern = new RegExp(
|
||||
`(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
|
||||
"gi",
|
||||
@@ -123,7 +166,7 @@ export function wrapFileReferencesInHtml(html: string): string {
|
||||
while ((match = tagPattern.exec(html)) !== null) {
|
||||
const tagStart = match.index;
|
||||
const tagEnd = tagPattern.lastIndex;
|
||||
const isClosing = match[1] === "/";
|
||||
const isClosing = match[1] === "</";
|
||||
const tagName = match[2].toLowerCase();
|
||||
|
||||
// Process text before this tag
|
||||
@@ -183,10 +226,8 @@ export function renderTelegramHtmlText(
|
||||
// For HTML mode, still wrap file references in the HTML
|
||||
return wrapFileReferencesInHtml(text);
|
||||
}
|
||||
const html = markdownToTelegramHtml(text, { tableMode: options.tableMode });
|
||||
// Wrap file references after markdown→HTML conversion
|
||||
// This ensures we only transform text nodes, not HTML attributes
|
||||
return wrapFileReferencesInHtml(html);
|
||||
// markdownToTelegramHtml already wraps file references by default
|
||||
return markdownToTelegramHtml(text, { tableMode: options.tableMode });
|
||||
}
|
||||
|
||||
export function markdownToTelegramChunks(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
markdownToTelegramChunks,
|
||||
markdownToTelegramHtml,
|
||||
renderTelegramHtmlText,
|
||||
wrapFileReferencesInHtml,
|
||||
@@ -25,6 +26,10 @@ describe("wrapFileReferencesInHtml", () => {
|
||||
expect(wrapFileReferencesInHtml("Check backup.pl")).toContain("Check <code>backup.pl</code>");
|
||||
});
|
||||
|
||||
it("wraps .sh filenames", () => {
|
||||
expect(wrapFileReferencesInHtml("Run backup.sh")).toContain("Run <code>backup.sh</code>");
|
||||
});
|
||||
|
||||
it("wraps file paths", () => {
|
||||
expect(wrapFileReferencesInHtml("Look at squad/friday/HEARTBEAT.md")).toContain(
|
||||
"Look at <code>squad/friday/HEARTBEAT.md</code>",
|
||||
@@ -50,10 +55,10 @@ describe("wrapFileReferencesInHtml", () => {
|
||||
expect(result).toBe(input);
|
||||
});
|
||||
|
||||
it("does not wrap in URLs", () => {
|
||||
const result = wrapFileReferencesInHtml("Visit https://example.com/README.md");
|
||||
expect(result).toContain('href="https://example.com/README.md"');
|
||||
expect(result).not.toContain("<code>README.md</code>");
|
||||
it("does not wrap file refs inside real URL anchor tags", () => {
|
||||
const input = 'Visit <a href="https://example.com/README.md">example.com/README.md</a>';
|
||||
const result = wrapFileReferencesInHtml(input);
|
||||
expect(result).toBe(input);
|
||||
});
|
||||
|
||||
it("handles mixed content correctly", () => {
|
||||
@@ -67,6 +72,27 @@ describe("wrapFileReferencesInHtml", () => {
|
||||
expect(wrapFileReferencesInHtml("File.md at start")).toContain("<code>File.md</code>");
|
||||
expect(wrapFileReferencesInHtml("Ends with file.md")).toContain("<code>file.md</code>");
|
||||
});
|
||||
|
||||
it("de-linkifies auto-linkified file ref anchors", () => {
|
||||
const input = '<a href="http://README.md">README.md</a>';
|
||||
expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
|
||||
});
|
||||
|
||||
it("de-linkifies auto-linkified path anchors", () => {
|
||||
const input = '<a href="http://squad/friday/HEARTBEAT.md">squad/friday/HEARTBEAT.md</a>';
|
||||
expect(wrapFileReferencesInHtml(input)).toBe("<code>squad/friday/HEARTBEAT.md</code>");
|
||||
});
|
||||
|
||||
it("preserves explicit links where label differs from href", () => {
|
||||
const input = '<a href="http://README.md">click here</a>';
|
||||
expect(wrapFileReferencesInHtml(input)).toBe(input);
|
||||
});
|
||||
|
||||
it("wraps file ref after closing anchor tag", () => {
|
||||
const input = '<a href="https://example.com">link</a> then README.md';
|
||||
const result = wrapFileReferencesInHtml(input);
|
||||
expect(result).toContain("</a> then <code>README.md</code>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("renderTelegramHtmlText - file reference wrapping", () => {
|
||||
@@ -98,4 +124,35 @@ describe("markdownToTelegramHtml - file reference wrapping", () => {
|
||||
const result = markdownToTelegramHtml("Check README.md", { wrapFileRefs: false });
|
||||
expect(result).not.toContain("<code>README.md</code>");
|
||||
});
|
||||
|
||||
it("wraps multiple file types in a single message", () => {
|
||||
const result = markdownToTelegramHtml("Edit main.go and script.py");
|
||||
expect(result).toContain("<code>main.go</code>");
|
||||
expect(result).toContain("<code>script.py</code>");
|
||||
});
|
||||
|
||||
it("preserves real URLs as anchor tags", () => {
|
||||
const result = markdownToTelegramHtml("Visit https://example.com");
|
||||
expect(result).toContain('<a href="https://example.com">');
|
||||
});
|
||||
|
||||
it("preserves explicit markdown links even when href looks like a file ref", () => {
|
||||
const result = markdownToTelegramHtml("[docs](http://README.md)");
|
||||
expect(result).toContain('<a href="http://README.md">docs</a>');
|
||||
});
|
||||
|
||||
it("wraps file ref after real URL in same message", () => {
|
||||
const result = markdownToTelegramHtml("Visit https://example.com and README.md");
|
||||
expect(result).toContain('<a href="https://example.com">');
|
||||
expect(result).toContain("<code>README.md</code>");
|
||||
});
|
||||
});
|
||||
|
||||
describe("markdownToTelegramChunks - file reference wrapping", () => {
|
||||
it("wraps file references in chunked output", () => {
|
||||
const chunks = markdownToTelegramChunks("Check README.md and backup.sh", 4096);
|
||||
expect(chunks.length).toBeGreaterThan(0);
|
||||
expect(chunks[0].html).toContain("<code>README.md</code>");
|
||||
expect(chunks[0].html).toContain("<code>backup.sh</code>");
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user