@@ -27,14 +27,19 @@ def process_page_file(self, *input_files: Optional[OcrdFileType]) -> None:
2727 metrics = self .parameter ["metrics" ]
2828 textequiv_level = self .parameter ["textequiv_level" ]
2929
30- try :
31- gt_file , ocr_file = input_files
32- assert gt_file , 'missing GT file'
33- assert ocr_file , 'missing OCR file'
34- assert gt_file .local_filename
35- assert ocr_file .local_filename
36- except (ValueError , AssertionError ) as err :
37- self .logger .warning (f'Missing either GT file, OCR file or both: { err } ' ) # TODO how to log which page?
30+ # wrong number of inputs: let fail
31+ gt_file , ocr_file = input_files
32+ # missing on either side: skip (zip_input_files already warned)
33+ if not gt_file or not ocr_file :
34+ return
35+ # missing download (i.e. OCRD_DOWNLOAD_INPUT=false):
36+ if not gt_file .local_filename :
37+ if config .OCRD_MISSING_INPUT == 'ABORT' :
38+ raise MissingInputFile (gt_file .fileGrp , gt_file .pageId , gt_file .mimetype )
39+ return
40+ if not ocr_file .local_filename :
41+ if config .OCRD_MISSING_INPUT == 'ABORT' :
42+ raise MissingInputFile (ocr_file .fileGrp , ocr_file .pageId , ocr_file .mimetype )
3843 return
3944
4045 page_id = gt_file .pageId
0 commit comments