mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-07 01:41:36 +00:00
fix: improve section extraction robustness (case-insensitive, H3, code blocks)
This commit is contained in:
@@ -96,4 +96,74 @@ Ignore this.
|
|||||||
expect(result).not.toBeNull();
|
expect(result).not.toBeNull();
|
||||||
expect(result).toContain("[truncated]");
|
expect(result).toContain("[truncated]");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("matches section names case-insensitively", async () => {
|
||||||
|
const content = `# Rules
|
||||||
|
|
||||||
|
## session startup
|
||||||
|
|
||||||
|
Read WORKFLOW_AUTO.md
|
||||||
|
|
||||||
|
## Other
|
||||||
|
`;
|
||||||
|
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
|
||||||
|
const result = await readPostCompactionContext(tmpDir);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result).toContain("WORKFLOW_AUTO.md");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("matches H3 headings", async () => {
|
||||||
|
const content = `# Rules
|
||||||
|
|
||||||
|
### Session Startup
|
||||||
|
|
||||||
|
Read these files.
|
||||||
|
|
||||||
|
### Other
|
||||||
|
`;
|
||||||
|
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
|
||||||
|
const result = await readPostCompactionContext(tmpDir);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result).toContain("Read these files");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("skips sections inside code blocks", async () => {
|
||||||
|
const content = `# Rules
|
||||||
|
|
||||||
|
\`\`\`markdown
|
||||||
|
## Session Startup
|
||||||
|
This is inside a code block and should NOT be extracted.
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
## Red Lines
|
||||||
|
|
||||||
|
Real red lines here.
|
||||||
|
|
||||||
|
## Other
|
||||||
|
`;
|
||||||
|
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
|
||||||
|
const result = await readPostCompactionContext(tmpDir);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result).toContain("Real red lines here");
|
||||||
|
expect(result).not.toContain("inside a code block");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("includes sub-headings within a section", async () => {
|
||||||
|
const content = `## Red Lines
|
||||||
|
|
||||||
|
### Rule 1
|
||||||
|
Never do X.
|
||||||
|
|
||||||
|
### Rule 2
|
||||||
|
Never do Y.
|
||||||
|
|
||||||
|
## Other Section
|
||||||
|
`;
|
||||||
|
fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
|
||||||
|
const result = await readPostCompactionContext(tmpDir);
|
||||||
|
expect(result).not.toBeNull();
|
||||||
|
expect(result).toContain("Rule 1");
|
||||||
|
expect(result).toContain("Rule 2");
|
||||||
|
expect(result).not.toContain("Other Section");
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -44,8 +44,10 @@ export async function readPostCompactionContext(workspaceDir: string): Promise<s
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract named H2 sections from markdown content.
|
* Extract named sections from markdown content.
|
||||||
* Matches "## SectionName" and captures until the next "## " or end of string.
|
* Matches H2 (##) or H3 (###) headings case-insensitively.
|
||||||
|
* Skips content inside fenced code blocks.
|
||||||
|
* Captures until the next heading of same or higher level, or end of string.
|
||||||
*/
|
*/
|
||||||
function extractSections(content: string, sectionNames: string[]): string[] {
|
function extractSections(content: string, sectionNames: string[]): string[] {
|
||||||
const results: string[] = [];
|
const results: string[] = [];
|
||||||
@@ -54,21 +56,54 @@ function extractSections(content: string, sectionNames: string[]): string[] {
|
|||||||
for (const name of sectionNames) {
|
for (const name of sectionNames) {
|
||||||
let sectionLines: string[] = [];
|
let sectionLines: string[] = [];
|
||||||
let inSection = false;
|
let inSection = false;
|
||||||
|
let sectionLevel = 0;
|
||||||
|
let inCodeBlock = false;
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
// Check if this is the start of our target section
|
// Track fenced code blocks
|
||||||
if (line.match(new RegExp(`^##\\s+${escapeRegExp(name)}\\s*$`))) {
|
if (line.trimStart().startsWith("```")) {
|
||||||
inSection = true;
|
inCodeBlock = !inCodeBlock;
|
||||||
sectionLines = [line];
|
if (inSection) {
|
||||||
|
sectionLines.push(line);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we're in the section, check if we've hit another H2 heading
|
// Skip heading detection inside code blocks
|
||||||
if (inSection) {
|
if (inCodeBlock) {
|
||||||
if (line.match(/^##\s+/)) {
|
if (inSection) {
|
||||||
// Hit another H2 heading, stop collecting
|
sectionLines.push(line);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this line is a heading
|
||||||
|
const headingMatch = line.match(/^(#{2,3})\s+(.+?)\s*$/);
|
||||||
|
|
||||||
|
if (headingMatch) {
|
||||||
|
const level = headingMatch[1].length; // 2 or 3
|
||||||
|
const headingText = headingMatch[2];
|
||||||
|
|
||||||
|
if (!inSection) {
|
||||||
|
// Check if this is our target section (case-insensitive)
|
||||||
|
if (headingText.toLowerCase() === name.toLowerCase()) {
|
||||||
|
inSection = true;
|
||||||
|
sectionLevel = level;
|
||||||
|
sectionLines = [line];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We're in section — stop if we hit a heading of same or higher level
|
||||||
|
if (level <= sectionLevel) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Lower-level heading (e.g., ### inside ##) — include it
|
||||||
|
sectionLines.push(line);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inSection) {
|
||||||
sectionLines.push(line);
|
sectionLines.push(line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -80,7 +115,3 @@ function extractSections(content: string, sectionNames: string[]): string[] {
|
|||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeRegExp(str: string): string {
|
|
||||||
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user