refactor(telegram): remove popular domain TLDs from file extension list

Remove .ai, .io, .tv, .fm from FILE_EXTENSIONS_WITH_TLD because:
- These are commonly used as real domains (x.ai, vercel.io, github.io)
- Rarely used as actual file extensions
- Users are more likely referring to websites than files

Keep: md, sh, py, go, pl (common file extensions, rarely intentional domains)
Keep: am, at, be, cc, co (less common as intentional domain references)

Update tests to reflect the change:
- Add test for supported extensions (.am, .at, .be, .cc, .co)
- Add test verifying popular TLDs stay as links
This commit is contained in:
divanoli
2026-02-05 12:03:43 +03:00
parent 38a94ec5fe
commit 94851de4f8
2 changed files with 40 additions and 26 deletions

View File

@@ -24,24 +24,24 @@ function escapeHtmlAttr(text: string): string {
* File extensions that share TLDs and commonly appear in code/documentation.
* These are wrapped in <code> tags to prevent Telegram from generating
* spurious domain registrar previews.
*
* Only includes extensions that are:
* 1. Commonly used as file extensions in code/docs
* 2. Rarely used as intentional domain references
*
* Excluded: .ai, .io, .tv, .fm (popular domain TLDs like x.ai, vercel.io, github.io)
*/
const FILE_EXTENSIONS_WITH_TLD = new Set([
// High priority - commonly referenced in messages
"md", // Markdown (Moldova)
"go", // Go language
"py", // Python (Paraguay)
"pl", // Perl (Poland)
"ai", // Adobe Illustrator (Anguilla)
"sh", // Shell (Saint Helena)
// Medium priority - sometimes referenced
"io", // Tuvalu (often used for tech projects)
"tv", // Tuvalu (video files)
"fm", // Federated States of Micronesia (audio)
"am", // Armenia
"at", // Austria
"be", // Belgium
"cc", // Cocos Islands
"co", // Colombia
"md", // Markdown (Moldova) - very common in repos
"go", // Go language - common in Go projects
"py", // Python (Paraguay) - common in Python projects
"pl", // Perl (Poland) - common in Perl projects
"sh", // Shell (Saint Helena) - common for scripts
"am", // Automake files (Armenia)
"at", // Assembly (Austria)
"be", // Backend files (Belgium)
"cc", // C++ source (Cocos Islands)
"co", // Configuration (Colombia)
]);
/** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */

View File

@@ -191,12 +191,25 @@ describe("edge cases", () => {
expect(result).toBe("README.md");
});
it("wraps all TLD extensions (.ai, .io, .tv, .fm)", () => {
const result = markdownToTelegramHtml("logo.ai and app.io and video.tv and audio.fm");
expect(result).toContain("<code>logo.ai</code>");
expect(result).toContain("<code>app.io</code>");
expect(result).toContain("<code>video.tv</code>");
expect(result).toContain("<code>audio.fm</code>");
it("wraps supported TLD extensions (.am, .at, .be, .cc, .co)", () => {
const result = markdownToTelegramHtml(
"Makefile.am and code.at and app.be and main.cc and config.co",
);
expect(result).toContain("<code>Makefile.am</code>");
expect(result).toContain("<code>code.at</code>");
expect(result).toContain("<code>app.be</code>");
expect(result).toContain("<code>main.cc</code>");
expect(result).toContain("<code>config.co</code>");
});
it("does not wrap popular domain TLDs (.ai, .io, .tv, .fm)", () => {
// These are commonly used as real domains (x.ai, vercel.io, github.io)
const result = markdownToTelegramHtml("Check x.ai and vercel.io and app.tv and radio.fm");
// Should be links, not code
expect(result).toContain('<a href="http://x.ai">');
expect(result).toContain('<a href="http://vercel.io">');
expect(result).toContain('<a href="http://app.tv">');
expect(result).toContain('<a href="http://radio.fm">');
});
it("does not wrap non-TLD extensions", () => {
@@ -287,11 +300,12 @@ describe("edge cases", () => {
});
it("wraps orphaned single-letter TLD patterns", () => {
const result1 = wrapFileReferencesInHtml("X.ai is cool");
expect(result1).toContain("<code>X.ai</code>");
// Use extensions still in the set (md, sh, py, go)
const result1 = wrapFileReferencesInHtml("X.md is cool");
expect(result1).toContain("<code>X.md</code>");
const result2 = wrapFileReferencesInHtml("Check R.io");
expect(result2).toContain("<code>R.io</code>");
const result2 = wrapFileReferencesInHtml("Check R.sh");
expect(result2).toContain("<code>R.sh</code>");
});
it("does not match filenames containing angle brackets", () => {