Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/src/tools/read-many-files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ Use this tool when the user's query implies needing the content of several files
.relative(toolBaseDir, filePath)
.replace(/\\/g, '/');

const fileType = detectFileType(filePath);
const fileType = await detectFileType(filePath);

if (fileType === 'image' || fileType === 'pdf') {
const fileExtension = path.extname(filePath).toLowerCase();
Expand Down
68 changes: 34 additions & 34 deletions packages/core/src/utils/fileUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,41 +142,41 @@ describe('fileUtils', () => {
}
});

it('should return false for an empty file', () => {
it('should return false for an empty file', async () => {
actualNodeFs.writeFileSync(filePathForBinaryTest, '');
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
});

it('should return false for a typical text file', () => {
it('should return false for a typical text file', async () => {
actualNodeFs.writeFileSync(
filePathForBinaryTest,
'Hello, world!\nThis is a test file with normal text content.',
);
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
});

it('should return true for a file with many null bytes', () => {
it('should return true for a file with many null bytes', async () => {
const binaryContent = Buffer.from([
0x48, 0x65, 0x00, 0x6c, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00,
]); // "He\0llo\0\0\0\0\0"
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
});

it('should return true for a file with high percentage of non-printable ASCII', () => {
it('should return true for a file with high percentage of non-printable ASCII', async () => {
const binaryContent = Buffer.from([
0x41, 0x42, 0x01, 0x02, 0x03, 0x04, 0x05, 0x43, 0x44, 0x06,
]); // AB\x01\x02\x03\x04\x05CD\x06
actualNodeFs.writeFileSync(filePathForBinaryTest, binaryContent);
expect(isBinaryFile(filePathForBinaryTest)).toBe(true);
expect(await isBinaryFile(filePathForBinaryTest)).toBe(true);
});

it('should return false if file access fails (e.g., ENOENT)', () => {
it('should return false if file access fails (e.g., ENOENT)', async () => {
// Ensure the file does not exist
if (actualNodeFs.existsSync(filePathForBinaryTest)) {
actualNodeFs.unlinkSync(filePathForBinaryTest);
}
expect(isBinaryFile(filePathForBinaryTest)).toBe(false);
expect(await isBinaryFile(filePathForBinaryTest)).toBe(false);
});
});

Expand All @@ -196,64 +196,64 @@ describe('fileUtils', () => {
vi.restoreAllMocks(); // Restore spies on actualNodeFs
});

it('should detect typescript type by extension (ts)', () => {
expect(detectFileType('file.ts')).toBe('text');
expect(detectFileType('file.test.ts')).toBe('text');
it('should detect typescript type by extension (ts)', async () => {
expect(await detectFileType('file.ts')).toBe('text');
expect(await detectFileType('file.test.ts')).toBe('text');
});

it('should detect image type by extension (png)', () => {
it('should detect image type by extension (png)', async () => {
mockMimeLookup.mockReturnValueOnce('image/png');
expect(detectFileType('file.png')).toBe('image');
expect(await detectFileType('file.png')).toBe('image');
});

it('should detect image type by extension (jpeg)', () => {
it('should detect image type by extension (jpeg)', async () => {
mockMimeLookup.mockReturnValueOnce('image/jpeg');
expect(detectFileType('file.jpg')).toBe('image');
expect(await detectFileType('file.jpg')).toBe('image');
});

it('should detect svg type by extension', () => {
expect(detectFileType('image.svg')).toBe('svg');
expect(detectFileType('image.icon.svg')).toBe('svg');
it('should detect svg type by extension', async () => {
expect(await detectFileType('image.svg')).toBe('svg');
expect(await detectFileType('image.icon.svg')).toBe('svg');
});

it('should detect pdf type by extension', () => {
it('should detect pdf type by extension', async () => {
mockMimeLookup.mockReturnValueOnce('application/pdf');
expect(detectFileType('file.pdf')).toBe('pdf');
expect(await detectFileType('file.pdf')).toBe('pdf');
});

it('should detect audio type by extension', () => {
it('should detect audio type by extension', async () => {
mockMimeLookup.mockReturnValueOnce('audio/mpeg');
expect(detectFileType('song.mp3')).toBe('audio');
expect(await detectFileType('song.mp3')).toBe('audio');
});

it('should detect video type by extension', () => {
it('should detect video type by extension', async () => {
mockMimeLookup.mockReturnValueOnce('video/mp4');
expect(detectFileType('movie.mp4')).toBe('video');
expect(await detectFileType('movie.mp4')).toBe('video');
});

it('should detect known binary extensions as binary (e.g. .zip)', () => {
it('should detect known binary extensions as binary (e.g. .zip)', async () => {
mockMimeLookup.mockReturnValueOnce('application/zip');
expect(detectFileType('archive.zip')).toBe('binary');
expect(await detectFileType('archive.zip')).toBe('binary');
});
it('should detect known binary extensions as binary (e.g. .exe)', () => {
it('should detect known binary extensions as binary (e.g. .exe)', async () => {
mockMimeLookup.mockReturnValueOnce('application/octet-stream'); // Common for .exe
expect(detectFileType('app.exe')).toBe('binary');
expect(await detectFileType('app.exe')).toBe('binary');
});

it('should use isBinaryFile for unknown extensions and detect as binary', () => {
it('should use isBinaryFile for unknown extensions and detect as binary', async () => {
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
// Create a file that isBinaryFile will identify as binary
const binaryContent = Buffer.from([
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
]);
actualNodeFs.writeFileSync(filePathForDetectTest, binaryContent);
expect(detectFileType(filePathForDetectTest)).toBe('binary');
expect(await detectFileType(filePathForDetectTest)).toBe('binary');
});

it('should default to text if mime type is unknown and content is not binary', () => {
it('should default to text if mime type is unknown and content is not binary', async () => {
mockMimeLookup.mockReturnValueOnce(false); // Unknown mime type
// filePathForDetectTest is already a text file by default from beforeEach
expect(detectFileType(filePathForDetectTest)).toBe('text');
expect(await detectFileType(filePathForDetectTest)).toBe('text');
});
});

Expand Down
51 changes: 36 additions & 15 deletions packages/core/src/utils/fileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/

import fs from 'fs';
import path from 'path';
import fs from 'node:fs';
import path from 'node:path';
import { PartUnion } from '@google/genai';
import mime from 'mime-types';

Expand Down Expand Up @@ -56,22 +56,24 @@ export function isWithinRoot(
/**
* Determines if a file is likely binary based on content sampling.
* @param filePath Path to the file.
* @returns True if the file appears to be binary.
* @returns Promise that resolves to true if the file appears to be binary.
*/
export function isBinaryFile(filePath: string): boolean {
export async function isBinaryFile(filePath: string): Promise<boolean> {
let fileHandle: fs.promises.FileHandle | undefined;
try {
const fd = fs.openSync(filePath, 'r');
fileHandle = await fs.promises.open(filePath, 'r');

// Read up to 4KB or file size, whichever is smaller
const fileSize = fs.fstatSync(fd).size;
const stats = await fileHandle.stat();
const fileSize = stats.size;
if (fileSize === 0) {
// Empty file is not considered binary for content checking
fs.closeSync(fd);
return false;
}
const bufferSize = Math.min(4096, fileSize);
const buffer = Buffer.alloc(bufferSize);
const bytesRead = fs.readSync(fd, buffer, 0, buffer.length, 0);
fs.closeSync(fd);
const result = await fileHandle.read(buffer, 0, buffer.length, 0);
const bytesRead = result.bytesRead;

if (bytesRead === 0) return false;

Expand All @@ -84,21 +86,40 @@ export function isBinaryFile(filePath: string): boolean {
}
// If >30% non-printable characters, consider it binary
return nonPrintableCount / bytesRead > 0.3;
} catch {
} catch (error) {
// Log error for debugging while maintaining existing behavior
console.warn(
`Failed to check if file is binary: ${filePath}`,
error instanceof Error ? error.message : String(error),
);
// If any error occurs (e.g. file not found, permissions),
// treat as not binary here; let higher-level functions handle existence/access errors.
return false;
} finally {
// Safely close the file handle if it was successfully opened
if (fileHandle) {
try {
await fileHandle.close();
} catch (closeError) {
// Log close errors for debugging while continuing with cleanup
console.warn(
`Failed to close file handle for: ${filePath}`,
closeError instanceof Error ? closeError.message : String(closeError),
);
// The important thing is that we attempted to clean up
}
}
}
}

/**
* Detects the type of file based on extension and content.
* @param filePath Path to the file.
* @returns 'text', 'image', 'pdf', 'audio', 'video', or 'binary'.
* @returns Promise that resolves to 'text', 'image', 'pdf', 'audio', 'video', 'binary' or 'svg'.
*/
export function detectFileType(
export async function detectFileType(
filePath: string,
): 'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg' {
): Promise<'text' | 'image' | 'pdf' | 'audio' | 'video' | 'binary' | 'svg'> {
const ext = path.extname(filePath).toLowerCase();

// The mimetype for "ts" is MPEG transport stream (a video format) but we want
Expand Down Expand Up @@ -166,7 +187,7 @@ export function detectFileType(

// Fallback to content-based check if mime type wasn't conclusive for image/pdf
// and it's not a known binary extension.
if (isBinaryFile(filePath)) {
if (await isBinaryFile(filePath)) {
return 'binary';
}

Expand Down Expand Up @@ -227,7 +248,7 @@ export async function processSingleFileContent(
);
}

const fileType = detectFileType(filePath);
const fileType = await detectFileType(filePath);
const relativePathForDisplay = path
.relative(rootDirectory, filePath)
.replace(/\\/g, '/');
Expand Down