decorate each predict method with cache

aditya0by0 · aditya0by0 · commit ad14325f63ed · 2025-08-01T23:24:25.000+02:00
diff --git a/chebifier/__init__.py b/chebifier/__init__.py
@@ -0,0 +1,6 @@
+# Note: The top-level package __init__.py runs only once,
+# even if multiple subpackages are imported later.
+
+from ._custom_cache import PerSmilesPerModelLRUCache
+
+modelwise_smiles_lru_cache = PerSmilesPerModelLRUCache(max_size=100)
diff --git a/chebifier/_custom_cache.py b/chebifier/_custom_cache.py
@@ -92,6 +92,9 @@ def wrapper(instance, smiles_list: list[str]):
             # Reorder results to match original indices
             results.sort(key=lambda x: x[0])  # sort by index
             ordered = [result for _, result in results]
+            assert len(ordered) == len(
+                smiles_list
+            ), "Result length does not match input length."
             return ordered
 
         return wrapper
diff --git a/chebifier/prediction_models/base_predictor.py b/chebifier/prediction_models/base_predictor.py
@@ -1,7 +1,7 @@
 import json
 from abc import ABC
 
-from functools import lru_cache
+from chebifier import modelwise_smiles_lru_cache
 
 
 class BasePredictor(ABC):
@@ -23,17 +23,13 @@ def __init__(
 
         self._description = kwargs.get("description", None)
 
+    @modelwise_smiles_lru_cache.batch_decorator
     def predict_smiles_list(self, smiles_list: list[str]) -> dict:
-        # list is not hashable, so we convert it to a tuple (useful for caching)
-        return self.predict_smiles_tuple(tuple(smiles_list))
-
-    @lru_cache(maxsize=100)
-    def predict_smiles_tuple(self, smiles_tuple: tuple[str]) -> dict:
         raise NotImplementedError()
 
     def predict_smiles(self, smiles: str) -> dict:
         # by default, use list-based prediction
-        return self.predict_smiles_tuple((smiles,))[0]
+        return self.predict_smiles_list([smiles])[0]
 
     @property
     def info_text(self):
diff --git a/chebifier/prediction_models/c3p_predictor.py b/chebifier/prediction_models/c3p_predictor.py
@@ -1,9 +1,9 @@
-from functools import lru_cache
-from typing import Optional, List
 from pathlib import Path
+from typing import List, Optional
 
 from c3p import classifier as c3p_classifier
 
+from chebifier import modelwise_smiles_lru_cache
 from chebifier.prediction_models import BasePredictor
 
 
@@ -24,8 +24,8 @@ def __init__(
         self.chemical_classes = chemical_classes
         self.chebi_graph = kwargs.get("chebi_graph", None)
 
-    @lru_cache(maxsize=100)
-    def predict_smiles_tuple(self, smiles_list: tuple[str]) -> list:
+    @modelwise_smiles_lru_cache.batch_decorator
+    def predict_smiles_list(self, smiles_list: list[str]) -> list:
         result_list = c3p_classifier.classify(
             list(smiles_list),
             self.program_directory,
diff --git a/chebifier/prediction_models/chebi_lookup.py b/chebifier/prediction_models/chebi_lookup.py
@@ -1,16 +1,16 @@
-from functools import lru_cache
+import json
+import os
 from typing import Optional
 
-from chebifier.prediction_models import BasePredictor
-import os
 import networkx as nx
 from rdkit import Chem
-import json
+
+from chebifier import modelwise_smiles_lru_cache
+from chebifier.prediction_models import BasePredictor
 from chebifier.utils import load_chebi_graph
 
 
 class ChEBILookupPredictor(BasePredictor):
-
     def __init__(
         self,
         model_name: str,
@@ -67,7 +67,6 @@ def build_smiles_lookup(self):
                     )
         return smiles_lookup
 
-    @lru_cache(maxsize=100)
     def predict_smiles(self, smiles: str) -> Optional[dict]:
         if not smiles:
             return None
@@ -94,7 +93,8 @@ def predict_smiles(self, smiles: str) -> Optional[dict]:
         else:
             return None
 
-    def predict_smiles_tuple(self, smiles_list: list[str]) -> list:
+    @modelwise_smiles_lru_cache.batch_decorator
+    def predict_smiles_list(self, smiles_list: list[str]) -> list:
         predictions = []
         for smiles in smiles_list:
             predictions.append(self.predict_smiles(smiles))
@@ -145,7 +145,8 @@ def explain_smiles(self, smiles: str) -> dict:
     # Example usage
     smiles_list = [
         "CCO",
-        "C1=CC=CC=C1" "*C(=O)OC[C@H](COP(=O)([O-])OCC[N+](C)(C)C)OC(*)=O",
+        "C1=CC=CC=C1",
+        "*C(=O)OC[C@H](COP(=O)([O-])OCC[N+](C)(C)C)OC(*)=O",
     ]  # SMILES with 251 matches in ChEBI
     predictions = predictor.predict_smiles_list(smiles_list)
     print(predictions)
diff --git a/chebifier/prediction_models/chemlog_predictor.py b/chebifier/prediction_models/chemlog_predictor.py
@@ -12,10 +12,11 @@
 )
 from chemlog.cli import CLASSIFIERS, _smiles_to_mol, strategy_call
 from chemlog_extra.alg_classification.by_element_classification import (
-    XMolecularEntityClassifier,
     OrganoXCompoundClassifier,
+    XMolecularEntityClassifier,
 )
-from functools import lru_cache
+
+from chebifier import modelwise_smiles_lru_cache
 
 from .base_predictor import BasePredictor
 
@@ -47,7 +48,6 @@
 
 
 class ChemlogExtraPredictor(BasePredictor):
-
     CHEMLOG_CLASSIFIER = None
 
     def __init__(self, model_name: str, **kwargs):
@@ -72,12 +72,10 @@ def predict_smiles_tuple(self, smiles_list: tuple[str]) -> list:
 
 
 class ChemlogXMolecularEntityPredictor(ChemlogExtraPredictor):
-
     CHEMLOG_CLASSIFIER = XMolecularEntityClassifier
 
 
 class ChemlogOrganoXCompoundPredictor(ChemlogExtraPredictor):
-
     CHEMLOG_CLASSIFIER = OrganoXCompoundClassifier
 
 
@@ -97,7 +95,6 @@ def __init__(self, model_name: str, **kwargs):
         # fmt: on
         print(f"Initialised ChemLog model {self.model_name}")
 
-    @lru_cache(maxsize=100)
     def predict_smiles(self, smiles: str) -> Optional[dict]:
         mol = _smiles_to_mol(smiles)
         if mol is None:
@@ -122,7 +119,8 @@ def predict_smiles(self, smiles: str) -> Optional[dict]:
             for label in self.peptide_labels + pos_labels
         }
 
-    def predict_smiles_tuple(self, smiles_list: tuple[str]) -> list:
+    @modelwise_smiles_lru_cache.batch_decorator
+    def predict_smiles_list(self, smiles_list: list[str]) -> list:
         results = []
         for i, smiles in tqdm.tqdm(enumerate(smiles_list)):
             results.append(self.predict_smiles(smiles))
diff --git a/chebifier/prediction_models/nn_predictor.py b/chebifier/prediction_models/nn_predictor.py
@@ -1,10 +1,10 @@
-from functools import lru_cache
-
 import numpy as np
 import torch
 import tqdm
 from rdkit import Chem
 
+from chebifier import modelwise_smiles_lru_cache
+
 from .base_predictor import BasePredictor
 
 
@@ -52,8 +52,8 @@ def read_smiles(self, smiles):
         d = reader.to_data(dict(features=smiles, labels=None))
         return d
 
-    @lru_cache(maxsize=100)
-    def predict_smiles_tuple(self, smiles_list: tuple[str]) -> list:
+    @modelwise_smiles_lru_cache.batch_decorator
+    def predict_smiles_list(self, smiles_list: list[str]) -> list:
         """Returns a list with the length of smiles_list, each element is either None (=failure) or a dictionary
         Of classes and predicted values."""
         token_dicts = []