fix(telegram): catch orphaned single-letter TLD patterns

When text like 'R&D.md' doesn't match the main file pattern (because &
breaks the character class), the 'D.md' part can still be auto-linked
by Telegram as a domain (https://d.md/).

Add second pass to catch orphaned TLD patterns like 'D.md', 'R.io', 'X.ai'
that follow non-alphanumeric characters and wrap them in <code> tags.

Pattern: ([^a-zA-Z0-9]|^)([A-Za-z]\.(?:extensions))(?=[^a-zA-Z0-9/]|$)

Tests added:
- 'wraps orphaned TLD pattern after special character' (R&D.md → R&<code>D.md</code>)
- 'wraps orphaned single-letter TLD patterns' (X.ai, R.io)
This commit is contained in:
divanoli
2026-02-05 11:54:09 +03:00
parent 5431591cfa
commit 38a94ec5fe
2 changed files with 26 additions and 4 deletions

View File

@@ -216,6 +216,20 @@ export function wrapFileReferencesInHtml(html: string): string {
return `${prefix}<code>${escapeHtml(filename)}</code>`;
});
// Second pass: catch orphaned single-letter TLD patterns (e.g., 'D.md' in 'R&D.md')
// These can be auto-linked by Telegram as domains
const orphanedTldPattern = new RegExp(
`([^a-zA-Z0-9]|^)([A-Za-z]\\.(?:${extensionsPattern}))(?=[^a-zA-Z0-9/]|$)`,
"g",
);
result = result.replace(orphanedTldPattern, (m, prefix, tld) => {
// Skip if already wrapped in a tag (check for < before or > after in context)
if (prefix === ">") {
return m;
}
return `${prefix}<code>${escapeHtml(tld)}</code>`;
});
return result;
}

View File

@@ -278,12 +278,20 @@ describe("edge cases", () => {
expect(wrapFileReferencesInHtml(input)).toBe(input);
});
it("does not match filenames with special characters", () => {
// The regex only matches [a-zA-Z0-9_.\\-./] so & breaks the pattern
it("wraps orphaned TLD pattern after special character", () => {
// R&D.md - the & breaks the main pattern, but D.md could be auto-linked
// So we wrap the orphaned D.md part to prevent Telegram linking it
const input = "R&D.md";
const result = wrapFileReferencesInHtml(input);
// Not wrapped because & is not in the allowed character class
expect(result).toBe(input);
expect(result).toBe("R&<code>D.md</code>");
});
it("wraps orphaned single-letter TLD patterns", () => {
const result1 = wrapFileReferencesInHtml("X.ai is cool");
expect(result1).toContain("<code>X.ai</code>");
const result2 = wrapFileReferencesInHtml("Check R.io");
expect(result2).toContain("<code>R.io</code>");
});
it("does not match filenames containing angle brackets", () => {