diff --git a/text_extract_api/extract/tasks.py b/text_extract_api/extract/tasks.py index bdd8c81..1d1a77a 100644 --- a/text_extract_api/extract/tasks.py +++ b/text_extract_api/extract/tasks.py @@ -23,10 +23,10 @@ def ocr_task( filename: str, file_hash: str, ocr_cache: bool, - prompt: str, - model: str, - language: str, - storage_profile: str, + prompt: Optional[str] = None, + model: Optional[str] = None, + language: Optional[str] = None, + storage_profile: Optional[str] = None, storage_filename: Optional[str] = None, ): """ @@ -85,7 +85,7 @@ def ocr_task( if storage_profile: if not storage_filename: - storage_filename = filename.replace('.pdf', '.md') + storage_filename = filename.replace('.', '_') + '.pdf' storage_manager = StorageManager(storage_profile) storage_manager.save(filename, storage_filename, extracted_text) diff --git a/text_extract_api/main.py b/text_extract_api/main.py index 6b978fd..7636757 100644 --- a/text_extract_api/main.py +++ b/text_extract_api/main.py @@ -78,7 +78,7 @@ async def ocr_endpoint( async def ocr_upload_endpoint( strategy: str = Form(...), prompt: str = Form(None), - model: str = Form(...), + model: str = Form(None), file: UploadFile = File(...), ocr_cache: bool = Form(...), storage_profile: str = Form('default'), @@ -104,7 +104,7 @@ class OllamaPullRequest(BaseModel): class OcrRequest(BaseModel): strategy: str = Field(..., description="OCR strategy to use") prompt: Optional[str] = Field(None, description="Prompt for the Ollama model") - model: str = Field(..., description="Model to use for the Ollama endpoint") + model: Optional[str] = Field(None, description="Model to use for the Ollama endpoint") file: FileField = Field(..., description="Base64 encoded document file") ocr_cache: bool = Field(..., description="Enable OCR result caching") storage_profile: Optional[str] = Field('default', description="Storage profile to use") @@ -126,7 +126,7 @@ def validate_storage_profile(cls, v): class OcrFormRequest(BaseModel): strategy: str = Field(..., description="OCR strategy to use") prompt: Optional[str] = Field(None, description="Prompt for the Ollama model") - model: str = Field(..., description="Model to use for the Ollama endpoint") + model: Optional[str] = Field(None, description="Model to use for the Ollama endpoint") ocr_cache: bool = Field(..., description="Enable OCR result caching") storage_profile: Optional[str] = Field('default', description="Storage profile to use") storage_filename: Optional[str] = Field(None, description="Storage filename to use")