fix(telegram): keep .co domains linked and wrap punctuated file refs

This commit is contained in:
Peter Steinberger
2026-02-14 00:26:57 +01:00
parent 6bf2ced2f5
commit 49d7055d12
2 changed files with 33 additions and 9 deletions

View File

@@ -41,7 +41,6 @@ const FILE_EXTENSIONS_WITH_TLD = new Set([
"at", // Assembly (Austria)
"be", // Backend files (Belgium)
"cc", // C++ source (Cocos Islands)
"co", // Configuration (Colombia)
]);
/** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */
@@ -150,7 +149,7 @@ export function wrapFileReferencesInHtml(html: string): string {
return `<code>${escapeHtml(label)}</code>`;
});
const filePattern = new RegExp(
`(^|>|[\\s])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[\\s<])`,
`(^|[^a-zA-Z0-9_\\-/])([a-zA-Z0-9_.\\-./]+\\.(?:${extensionsPattern}))(?=$|[^a-zA-Z0-9_\\-/])`,
"gi",
);
@@ -173,11 +172,18 @@ export function wrapFileReferencesInHtml(html: string): string {
// Process text before this tag
const textBefore = html.slice(lastIndex, tagStart);
result += textBefore.replace(filePattern, (m, prefix, filename) => {
result += textBefore.replace(filePattern, (m, prefix, filename, offset, source) => {
// Skip if inside protected tags or if it's a URL
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
return m;
}
// Skip if we're inside any HTML tag (e.g., attributes on tags other than code/pre/a)
const filenameOffset = Number(offset) + String(prefix).length;
const lastOpen = String(source).lastIndexOf("<", filenameOffset);
const lastClose = String(source).lastIndexOf(">", filenameOffset);
if (lastOpen > lastClose) {
return m;
}
if (filename.startsWith("//")) {
return m;
}
@@ -203,10 +209,16 @@ export function wrapFileReferencesInHtml(html: string): string {
// Process remaining text
const remainingText = html.slice(lastIndex);
result += remainingText.replace(filePattern, (m, prefix, filename) => {
result += remainingText.replace(filePattern, (m, prefix, filename, offset, source) => {
if (codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
return m;
}
const filenameOffset = Number(offset) + String(prefix).length;
const lastOpen = String(source).lastIndexOf("<", filenameOffset);
const lastClose = String(source).lastIndexOf(">", filenameOffset);
if (lastOpen > lastClose) {
return m;
}
if (filename.startsWith("//")) {
return m;
}

View File

@@ -73,6 +73,13 @@ describe("wrapFileReferencesInHtml", () => {
expect(wrapFileReferencesInHtml("Ends with file.md")).toContain("<code>file.md</code>");
});
it("wraps file refs with punctuation boundaries", () => {
expect(wrapFileReferencesInHtml("See README.md.")).toContain("<code>README.md</code>.");
expect(wrapFileReferencesInHtml("See README.md,")).toContain("<code>README.md</code>,");
expect(wrapFileReferencesInHtml("(README.md)")).toContain("(<code>README.md</code>)");
expect(wrapFileReferencesInHtml("README.md:")).toContain("<code>README.md</code>:");
});
it("de-linkifies auto-linkified file ref anchors", () => {
const input = '<a href="http://README.md">README.md</a>';
expect(wrapFileReferencesInHtml(input)).toBe("<code>README.md</code>");
@@ -193,15 +200,12 @@ describe("edge cases", () => {
expect(result).toBe("README.md");
});
it("wraps supported TLD extensions (.am, .at, .be, .cc, .co)", () => {
const result = markdownToTelegramHtml(
"Makefile.am and code.at and app.be and main.cc and config.co",
);
it("wraps supported TLD extensions (.am, .at, .be, .cc)", () => {
const result = markdownToTelegramHtml("Makefile.am and code.at and app.be and main.cc");
expect(result).toContain("<code>Makefile.am</code>");
expect(result).toContain("<code>code.at</code>");
expect(result).toContain("<code>app.be</code>");
expect(result).toContain("<code>main.cc</code>");
expect(result).toContain("<code>config.co</code>");
});
it("does not wrap popular domain TLDs (.ai, .io, .tv, .fm)", () => {
@@ -214,6 +218,14 @@ describe("edge cases", () => {
expect(result).toContain('<a href="http://radio.fm">');
});
it("keeps .co domains as links", () => {
const result = markdownToTelegramHtml("Visit t.co and openclaw.co");
expect(result).toContain('<a href="http://t.co">');
expect(result).toContain('<a href="http://openclaw.co">');
expect(result).not.toContain("<code>t.co</code>");
expect(result).not.toContain("<code>openclaw.co</code>");
});
it("does not wrap non-TLD extensions", () => {
const result = markdownToTelegramHtml("image.png and style.css and script.js");
expect(result).not.toContain("<code>image.png</code>");