mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 01:38:26 +00:00
Media: preserve PDF MIME classification in file extraction
This commit is contained in:
@@ -632,6 +632,38 @@ describe("applyMediaUnderstanding", () => {
|
||||
expect(ctx.Body).not.toContain("<file");
|
||||
});
|
||||
|
||||
it("does not reclassify PDF attachments as text/plain", async () => {
|
||||
const pseudoPdf = Buffer.from("%PDF-1.7\n1 0 obj\n<< /Type /Catalog >>\nendobj\n", "utf8");
|
||||
const filePath = await createTempMediaFile({
|
||||
fileName: "report.pdf",
|
||||
content: pseudoPdf,
|
||||
});
|
||||
|
||||
const cfg: OpenClawConfig = {
|
||||
...createMediaDisabledConfig(),
|
||||
gateway: {
|
||||
http: {
|
||||
endpoints: {
|
||||
responses: {
|
||||
files: { allowedMimes: ["text/plain"] },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const { ctx, result } = await applyWithDisabledMedia({
|
||||
body: "<media:file>",
|
||||
mediaPath: filePath,
|
||||
mediaType: "application/pdf",
|
||||
cfg,
|
||||
});
|
||||
|
||||
expect(result.appliedFile).toBe(false);
|
||||
expect(ctx.Body).toBe("<media:file>");
|
||||
expect(ctx.Body).not.toContain("<file");
|
||||
});
|
||||
|
||||
it("respects configured allowedMimes for text-like attachments", async () => {
|
||||
const tsvText = "a\tb\tc\n1\t2\t3";
|
||||
const tsvPath = await createTempMediaFile({
|
||||
|
||||
@@ -382,7 +382,11 @@ async function extractFileBlocks(params: {
|
||||
}
|
||||
const utf16Charset = resolveUtf16Charset(bufferResult?.buffer);
|
||||
const textSample = decodeTextSample(bufferResult?.buffer);
|
||||
const textLike = Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer);
|
||||
// Do not coerce real PDFs into text/plain via printable-byte heuristics.
|
||||
// PDFs have a dedicated extraction path in extractFileContentFromSource.
|
||||
const allowTextHeuristic = normalizedRawMime !== "application/pdf";
|
||||
const textLike =
|
||||
allowTextHeuristic && (Boolean(utf16Charset) || looksLikeUtf8Text(bufferResult?.buffer));
|
||||
const guessedDelimited = textLike ? guessDelimitedMime(textSample) : undefined;
|
||||
const textHint =
|
||||
forcedTextMimeResolved ?? guessedDelimited ?? (textLike ? "text/plain" : undefined);
|
||||
|
||||
Reference in New Issue
Block a user