fix: improve section extraction robustness (case-insensitive, H3, code blocks)

This commit is contained in:
康熙
2026-02-16 20:48:24 +08:00
committed by Peter Steinberger
parent 90476d465d
commit d0b33f23eb
2 changed files with 116 additions and 15 deletions

View File

@@ -96,4 +96,74 @@ Ignore this.
expect(result).not.toBeNull(); expect(result).not.toBeNull();
expect(result).toContain("[truncated]"); expect(result).toContain("[truncated]");
}); });
it("matches section names case-insensitively", async () => {
const content = `# Rules
## session startup
Read WORKFLOW_AUTO.md
## Other
`;
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
const result = await readPostCompactionContext(tmpDir);
expect(result).not.toBeNull();
expect(result).toContain("WORKFLOW_AUTO.md");
});
it("matches H3 headings", async () => {
const content = `# Rules
### Session Startup
Read these files.
### Other
`;
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
const result = await readPostCompactionContext(tmpDir);
expect(result).not.toBeNull();
expect(result).toContain("Read these files");
});
it("skips sections inside code blocks", async () => {
const content = `# Rules
\`\`\`markdown
## Session Startup
This is inside a code block and should NOT be extracted.
\`\`\`
## Red Lines
Real red lines here.
## Other
`;
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
const result = await readPostCompactionContext(tmpDir);
expect(result).not.toBeNull();
expect(result).toContain("Real red lines here");
expect(result).not.toContain("inside a code block");
});
it("includes sub-headings within a section", async () => {
const content = `## Red Lines
### Rule 1
Never do X.
### Rule 2
Never do Y.
## Other Section
`;
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
const result = await readPostCompactionContext(tmpDir);
expect(result).not.toBeNull();
expect(result).toContain("Rule 1");
expect(result).toContain("Rule 2");
expect(result).not.toContain("Other Section");
});
}); });

View File

@@ -44,8 +44,10 @@ export async function readPostCompactionContext(workspaceDir: string): Promise<s
} }
/** /**
* Extract named H2 sections from markdown content. * Extract named sections from markdown content.
* Matches "## SectionName" and captures until the next "## " or end of string. * Matches H2 (##) or H3 (###) headings case-insensitively.
* Skips content inside fenced code blocks.
* Captures until the next heading of same or higher level, or end of string.
*/ */
function extractSections(content: string, sectionNames: string[]): string[] { function extractSections(content: string, sectionNames: string[]): string[] {
const results: string[] = []; const results: string[] = [];
@@ -54,21 +56,54 @@ function extractSections(content: string, sectionNames: string[]): string[] {
for (const name of sectionNames) { for (const name of sectionNames) {
let sectionLines: string[] = []; let sectionLines: string[] = [];
let inSection = false; let inSection = false;
let sectionLevel = 0;
let inCodeBlock = false;
for (const line of lines) { for (const line of lines) {
// Check if this is the start of our target section // Track fenced code blocks
if (line.match(new RegExp(`^##\\s+${escapeRegExp(name)}\\s*$`))) { if (line.trimStart().startsWith("```")) {
inSection = true; inCodeBlock = !inCodeBlock;
sectionLines = [line]; if (inSection) {
sectionLines.push(line);
}
continue; continue;
} }
// If we're in the section, check if we've hit another H2 heading // Skip heading detection inside code blocks
if (inCodeBlock) {
if (inSection) { if (inSection) {
if (line.match(/^##\s+/)) { sectionLines.push(line);
// Hit another H2 heading, stop collecting }
continue;
}
// Check if this line is a heading
const headingMatch = line.match(/^(#{2,3})\s+(.+?)\s*$/);
if (headingMatch) {
const level = headingMatch[1].length; // 2 or 3
const headingText = headingMatch[2];
if (!inSection) {
// Check if this is our target section (case-insensitive)
if (headingText.toLowerCase() === name.toLowerCase()) {
inSection = true;
sectionLevel = level;
sectionLines = [line];
continue;
}
} else {
// We're in section — stop if we hit a heading of same or higher level
if (level <= sectionLevel) {
break; break;
} }
// Lower-level heading (e.g., ### inside ##) — include it
sectionLines.push(line);
continue;
}
}
if (inSection) {
sectionLines.push(line); sectionLines.push(line);
} }
} }
@@ -80,7 +115,3 @@ function extractSections(content: string, sectionNames: string[]): string[] {
return results; return results;
} }
function escapeRegExp(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}