Skip to content

Commit a1f1378

Browse files
Han5991jacob314SandyTao520
authored andcommitted
perf(core): implement parallel file processing for 74% performance improvement (google-gemini#4763)
Co-authored-by: Jacob Richman <[email protected]> Co-authored-by: Sandy Tao <[email protected]>
1 parent 1dd1406 commit a1f1378

File tree

2 files changed

+266
-52
lines changed

2 files changed

+266
-52
lines changed

packages/core/src/tools/read-many-files.test.ts

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,4 +477,139 @@ describe('ReadManyFilesTool', () => {
477477
fs.rmSync(tempDir2, { recursive: true, force: true });
478478
});
479479
});
480+
481+
describe('Batch Processing', () => {
482+
const createMultipleFiles = (count: number, contentPrefix = 'Content') => {
483+
const files: string[] = [];
484+
for (let i = 0; i < count; i++) {
485+
const fileName = `file${i}.txt`;
486+
createFile(fileName, `${contentPrefix} ${i}`);
487+
files.push(fileName);
488+
}
489+
return files;
490+
};
491+
492+
const createFile = (filePath: string, content = '') => {
493+
const fullPath = path.join(tempRootDir, filePath);
494+
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
495+
fs.writeFileSync(fullPath, content);
496+
};
497+
498+
it('should process files in parallel for performance', async () => {
499+
// Mock detectFileType to add artificial delay to simulate I/O
500+
const detectFileTypeSpy = vi.spyOn(
501+
await import('../utils/fileUtils.js'),
502+
'detectFileType',
503+
);
504+
505+
// Create files
506+
const fileCount = 4;
507+
const files = createMultipleFiles(fileCount, 'Batch test');
508+
509+
// Mock with 100ms delay per file to simulate I/O operations
510+
detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
511+
await new Promise((resolve) => setTimeout(resolve, 100));
512+
return 'text';
513+
});
514+
515+
const startTime = Date.now();
516+
const params = { paths: files };
517+
const result = await tool.execute(params, new AbortController().signal);
518+
const endTime = Date.now();
519+
520+
const processingTime = endTime - startTime;
521+
522+
console.log(
523+
`Processing time: ${processingTime}ms for ${fileCount} files`,
524+
);
525+
526+
// Verify parallel processing performance improvement
527+
// Parallel processing should complete in ~100ms (single file time)
528+
// Sequential would take ~400ms (4 files × 100ms each)
529+
expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
530+
531+
// Verify all files were processed
532+
const content = result.llmContent as string[];
533+
expect(content).toHaveLength(fileCount);
534+
535+
// Cleanup mock
536+
detectFileTypeSpy.mockRestore();
537+
});
538+
539+
it('should handle batch processing errors gracefully', async () => {
540+
// Create mix of valid and problematic files
541+
createFile('valid1.txt', 'Valid content 1');
542+
createFile('valid2.txt', 'Valid content 2');
543+
createFile('valid3.txt', 'Valid content 3');
544+
545+
const params = {
546+
paths: [
547+
'valid1.txt',
548+
'valid2.txt',
549+
'nonexistent-file.txt', // This will fail
550+
'valid3.txt',
551+
],
552+
};
553+
554+
const result = await tool.execute(params, new AbortController().signal);
555+
const content = result.llmContent as string[];
556+
557+
// Should successfully process valid files despite one failure
558+
expect(content.length).toBeGreaterThanOrEqual(3);
559+
expect(result.returnDisplay).toContain('Successfully read');
560+
561+
// Verify valid files were processed
562+
const expectedPath1 = path.join(tempRootDir, 'valid1.txt');
563+
const expectedPath3 = path.join(tempRootDir, 'valid3.txt');
564+
expect(content.some((c) => c.includes(expectedPath1))).toBe(true);
565+
expect(content.some((c) => c.includes(expectedPath3))).toBe(true);
566+
});
567+
568+
it('should execute file operations concurrently', async () => {
569+
// Track execution order to verify concurrency
570+
const executionOrder: string[] = [];
571+
const detectFileTypeSpy = vi.spyOn(
572+
await import('../utils/fileUtils.js'),
573+
'detectFileType',
574+
);
575+
576+
const files = ['file1.txt', 'file2.txt', 'file3.txt'];
577+
files.forEach((file) => createFile(file, 'test content'));
578+
579+
// Mock to track concurrent vs sequential execution
580+
detectFileTypeSpy.mockImplementation(async (filePath: string) => {
581+
const fileName = filePath.split('/').pop() || '';
582+
executionOrder.push(`start:${fileName}`);
583+
584+
// Add delay to make timing differences visible
585+
await new Promise((resolve) => setTimeout(resolve, 50));
586+
587+
executionOrder.push(`end:${fileName}`);
588+
return 'text';
589+
});
590+
591+
await tool.execute({ paths: files }, new AbortController().signal);
592+
593+
console.log('Execution order:', executionOrder);
594+
595+
// Verify concurrent execution pattern
596+
// In parallel execution: all "start:" events should come before all "end:" events
597+
// In sequential execution: "start:file1", "end:file1", "start:file2", "end:file2", etc.
598+
599+
const startEvents = executionOrder.filter((e) =>
600+
e.startsWith('start:'),
601+
).length;
602+
const firstEndIndex = executionOrder.findIndex((e) =>
603+
e.startsWith('end:'),
604+
);
605+
const startsBeforeFirstEnd = executionOrder
606+
.slice(0, firstEndIndex)
607+
.filter((e) => e.startsWith('start:')).length;
608+
609+
// For parallel processing, ALL start events should happen before the first end event
610+
expect(startsBeforeFirstEnd).toBe(startEvents); // Should PASS with parallel implementation
611+
612+
detectFileTypeSpy.mockRestore();
613+
});
614+
});
480615
});

packages/core/src/tools/read-many-files.ts

Lines changed: 131 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,27 @@ export interface ReadManyFilesParams {
7070
};
7171
}
7272

73+
/**
74+
* Result type for file processing operations
75+
*/
76+
type FileProcessingResult =
77+
| {
78+
success: true;
79+
filePath: string;
80+
relativePathForDisplay: string;
81+
fileReadResult: NonNullable<
82+
Awaited<ReturnType<typeof processSingleFileContent>>
83+
>;
84+
reason?: undefined;
85+
}
86+
| {
87+
success: false;
88+
filePath: string;
89+
relativePathForDisplay: string;
90+
fileReadResult?: undefined;
91+
reason: string;
92+
};
93+
7394
/**
7495
* Default exclusion patterns for commonly ignored directories and binary file types.
7596
* These are compatible with glob ignore patterns.
@@ -413,66 +434,124 @@ Use this tool when the user's query implies needing the content of several files
413434

414435
const sortedFiles = Array.from(filesToConsider).sort();
415436

416-
for (const filePath of sortedFiles) {
417-
const relativePathForDisplay = path
418-
.relative(this.config.getTargetDir(), filePath)
419-
.replace(/\\/g, '/');
437+
const fileProcessingPromises = sortedFiles.map(
438+
async (filePath): Promise<FileProcessingResult> => {
439+
try {
440+
const relativePathForDisplay = path
441+
.relative(this.config.getTargetDir(), filePath)
442+
.replace(/\\/g, '/');
443+
444+
const fileType = await detectFileType(filePath);
445+
446+
if (fileType === 'image' || fileType === 'pdf') {
447+
const fileExtension = path.extname(filePath).toLowerCase();
448+
const fileNameWithoutExtension = path.basename(
449+
filePath,
450+
fileExtension,
451+
);
452+
const requestedExplicitly = inputPatterns.some(
453+
(pattern: string) =>
454+
pattern.toLowerCase().includes(fileExtension) ||
455+
pattern.includes(fileNameWithoutExtension),
456+
);
457+
458+
if (!requestedExplicitly) {
459+
return {
460+
success: false,
461+
filePath,
462+
relativePathForDisplay,
463+
reason:
464+
'asset file (image/pdf) was not explicitly requested by name or extension',
465+
};
466+
}
467+
}
468+
469+
// Use processSingleFileContent for all file types now
470+
const fileReadResult = await processSingleFileContent(
471+
filePath,
472+
this.config.getTargetDir(),
473+
);
474+
475+
if (fileReadResult.error) {
476+
return {
477+
success: false,
478+
filePath,
479+
relativePathForDisplay,
480+
reason: `Read error: ${fileReadResult.error}`,
481+
};
482+
}
483+
484+
return {
485+
success: true,
486+
filePath,
487+
relativePathForDisplay,
488+
fileReadResult,
489+
};
490+
} catch (error) {
491+
const relativePathForDisplay = path
492+
.relative(this.config.getTargetDir(), filePath)
493+
.replace(/\\/g, '/');
494+
495+
return {
496+
success: false,
497+
filePath,
498+
relativePathForDisplay,
499+
reason: `Unexpected error: ${error instanceof Error ? error.message : String(error)}`,
500+
};
501+
}
502+
},
503+
);
420504

421-
const fileType = await detectFileType(filePath);
505+
const results = await Promise.allSettled(fileProcessingPromises);
422506

423-
if (fileType === 'image' || fileType === 'pdf') {
424-
const fileExtension = path.extname(filePath).toLowerCase();
425-
const fileNameWithoutExtension = path.basename(filePath, fileExtension);
426-
const requestedExplicitly = inputPatterns.some(
427-
(pattern: string) =>
428-
pattern.toLowerCase().includes(fileExtension) ||
429-
pattern.includes(fileNameWithoutExtension),
430-
);
507+
for (const result of results) {
508+
if (result.status === 'fulfilled') {
509+
const fileResult = result.value;
431510

432-
if (!requestedExplicitly) {
511+
if (!fileResult.success) {
512+
// Handle skipped files (images/PDFs not requested or read errors)
433513
skippedFiles.push({
434-
path: relativePathForDisplay,
435-
reason:
436-
'asset file (image/pdf) was not explicitly requested by name or extension',
514+
path: fileResult.relativePathForDisplay,
515+
reason: fileResult.reason,
437516
});
438-
continue;
517+
} else {
518+
// Handle successfully processed files
519+
const { filePath, relativePathForDisplay, fileReadResult } =
520+
fileResult;
521+
522+
if (typeof fileReadResult.llmContent === 'string') {
523+
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
524+
'{filePath}',
525+
filePath,
526+
);
527+
contentParts.push(
528+
`${separator}\n\n${fileReadResult.llmContent}\n\n`,
529+
);
530+
} else {
531+
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
532+
}
533+
534+
processedFilesRelativePaths.push(relativePathForDisplay);
535+
536+
const lines =
537+
typeof fileReadResult.llmContent === 'string'
538+
? fileReadResult.llmContent.split('\n').length
539+
: undefined;
540+
const mimetype = getSpecificMimeType(filePath);
541+
recordFileOperationMetric(
542+
this.config,
543+
FileOperation.READ,
544+
lines,
545+
mimetype,
546+
path.extname(filePath),
547+
);
439548
}
440-
}
441-
442-
// Use processSingleFileContent for all file types now
443-
const fileReadResult = await processSingleFileContent(
444-
filePath,
445-
this.config.getTargetDir(),
446-
);
447-
448-
if (fileReadResult.error) {
549+
} else {
550+
// Handle Promise rejection (unexpected errors)
449551
skippedFiles.push({
450-
path: relativePathForDisplay,
451-
reason: `Read error: ${fileReadResult.error}`,
552+
path: 'unknown',
553+
reason: `Unexpected error: ${result.reason}`,
452554
});
453-
} else {
454-
if (typeof fileReadResult.llmContent === 'string') {
455-
const separator = DEFAULT_OUTPUT_SEPARATOR_FORMAT.replace(
456-
'{filePath}',
457-
filePath,
458-
);
459-
contentParts.push(`${separator}\n\n${fileReadResult.llmContent}\n\n`);
460-
} else {
461-
contentParts.push(fileReadResult.llmContent); // This is a Part for image/pdf
462-
}
463-
processedFilesRelativePaths.push(relativePathForDisplay);
464-
const lines =
465-
typeof fileReadResult.llmContent === 'string'
466-
? fileReadResult.llmContent.split('\n').length
467-
: undefined;
468-
const mimetype = getSpecificMimeType(filePath);
469-
recordFileOperationMetric(
470-
this.config,
471-
FileOperation.READ,
472-
lines,
473-
mimetype,
474-
path.extname(filePath),
475-
);
476555
}
477556
}
478557

0 commit comments

Comments
 (0)