IBM · esnible · Feb 24, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -151,6 +151,9 @@ pdl-live/package-lock.json
 *_result.yaml
 *_trace.json
 
+# Demo files
+pdl-rag-demo.db
+
 # Built docs
 _site
 

diff --git a/examples/rag/README.md b/examples/rag/README.md
@@ -1,4 +1,30 @@
-This example requires you to install:
+This example uses [Ollama](../../tutorial/#using-ollama-models).  Fetch the models used in this example with
+
+```bash
+ollama pull mxbai-embed-large
+ollama pull granite-code:8b
 ```
-pip install scikit-learn
-```
+
+This example requires you to install pypdf, langchain, langchain-community, and milvus.
+
+```bash
+pip install pypdf milvus langchain langchain-community
+```
+
+To run the demo, first load a PDF document into the vector database:
+
+```bash
+pdl examples/rag/pdf_index.pdl
+```
+
+After the data has loaded, the program prints "Success!"
+
+Next, query the vector database for relevant text and use that text in a query to an LLM:
+
+```bash
+pdl examples/rag/pdf_query.pdl
+```
+
+This PDL program computes a data structure containing all questions and answers.  It is printed at the end.
+
+To cleanup, run `rm pdl-rag-demo.db`.
diff --git a/examples/rag/pdf_index.pdl b/examples/rag/pdf_index.pdl
@@ -0,0 +1,21 @@
+# Load PDF document into vector database
+
+description: Load document into vector database
+text:
+- include: rag_library1.pdl
+- call: ${ pdf_parse }
+  args:
+    filename: "docs/assets/pdl_quick_reference.pdf"
+    chunk_size: 400
+    chunk_overlap: 100
+  def: input_data
+  contribute: []
+- call: ${ rag_index }
+  args:
+    inp: ${ input_data }
+    encoder_model: "ollama/mxbai-embed-large"
+    embed_dimension: 1024
+    database_name: "./pdl-rag-demo.db"
+    collection_name: "pdl_rag_collection"  
+  contribute: []
+- "Success!"
diff --git a/examples/rag/pdf_query.pdl b/examples/rag/pdf_query.pdl
@@ -0,0 +1,47 @@
+# Query vector database for relevant passages; use passages to query LLM.
+
+defs:
+  QUESTIONS:
+    data: [
+      "Does PDL have a contribute keyword?",
+      "Is Paris the capital of France?"
+    ]
+lastOf:
+  - include: rag_library1.pdl
+  - defs:
+      CONCLUSIONS:
+        for:
+          question: ${ QUESTIONS }
+        repeat:
+            # Define MATCHING_PASSAGES as the text retrieved from the vector DB
+          defs:
+            MATCHING_PASSAGES:
+              call: ${ rag_retrieve }
+              args:
+                # I am passing the client in implicitly.  NOT WHAT I WANT
+                inp: ${ question }
+                encoder_model: "ollama/mxbai-embed-large"
+                limit: 3
+                collection_name: "pdl_rag_collection"  
+                database_name: "./pdl-rag-demo.db"
+            # debug:
+            #   lang: python
+            #   code: |
+            #      print(f"MATCHING_PASSAGES='{MATCHING_PASSAGES}'")
+            #      result = None
+            CONCLUSION:
+              model: ollama/granite-code:8b
+              input: >
+                Here is some information:
+                ${ MATCHING_PASSAGES }
+                Question: ${ question }
+                Answer:
+              parameters:
+                # Uncomment if you only want Yes or No
+                # stop: [',', ':', '.']
+                temperature: 0
+          data:
+            ${question}: ${CONCLUSION}
+        join:
+          as: array
+    text: "${ CONCLUSIONS | tojson }\n"
diff --git a/examples/rag/rag.py b/examples/rag/rag.py
@@ -0,0 +1,120 @@
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+from litellm import embedding
+from litellm.exceptions import APIConnectionError
+from pymilvus import MilvusClient
+
+
+def parse(filename: str, chunk_size: int, chunk_overlap: int) -> list[str]:
+    loader = PyPDFLoader(filename)
+
+    docs = loader.load()
+    # 'docs' will be a list[langchain_core.documents.base.Document],
+    # one entry per page.  We don't want to return this, because PDL only
+    # wants types that work in JSON schemas.
+
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=len,
+        is_separator_regex=False,
+    )
+
+    split_docs = text_splitter.split_documents(docs)
+
+    # Note that this throws away the metadata.
+    return [doc.page_content for doc in split_docs]
+
+
+def rag_index(
+    inp: list[str],
+    encoder_model: str,
+    embed_dimension: int,
+    database_name: str,
+    collection_name: str,
+):
+    # Have LiteLLM embed the passages
+    response = embedding(
+        model=encoder_model,
+        input=inp,
+    )
+
+    client = MilvusClient(
+        database_name
+    )  # Use URL if talking to remote Milvus (non-Lite)
+
+    if client.has_collection(collection_name=collection_name):
+        client.drop_collection(collection_name=collection_name)
+    client.create_collection(
+        collection_name=collection_name, dimension=embed_dimension, overwrite=True
+    )
+
+    mid = 0  # There is also an auto-id feature in Milvus, which we are not using
+    for text in inp:
+        vector = response.data[mid]["embedding"]  # type: ignore
+        client.insert(
+            collection_name=collection_name,
+            data=[
+                {
+                    "id": mid,
+                    "text": text,
+                    "vector": vector,
+                    # We SHOULD set "source" and "url" based on the metadata we threw away in parse()
+                }
+            ],
+        )
+        mid = mid + 1
+
+    return True
+
+
+# Global cache of database clients.
+# (We do this so the PDL programmer doesn't need to explicitly maintain the client connection)
+DATABASE_CLIENTS: dict[str, MilvusClient] = {}
+
+
+def get_or_create_client(database_name: str):
+    if database_name in DATABASE_CLIENTS:
+        return DATABASE_CLIENTS[database_name]
+
+    client = MilvusClient(
+        database_name
+    )  # Use URL if talking to remote Milvus (non-Lite)
+    DATABASE_CLIENTS[database_name] = client
+    return client
+
+
+# Search vector database collection for input.
+# The output is 'limit' vectors, as strings, concatenated together
+def rag_retrieve(
+    inp: str, encoder_model: str, limit: int, database_name: str, collection_name: str
+) -> str:
+    # Embed the question as a vector
+    try:
+        response = embedding(
+            model=encoder_model,
+            input=[inp],
+        )
+    except APIConnectionError:
+        # Retry because of https://github.com/BerriAI/litellm/issues/7667
+        response = embedding(
+            model=encoder_model,
+            input=[inp],
+        )
+    except BaseException as be:
+        # Typically litellm.exceptions.APIConnectionError
+        return f"Unexpected {type(be)}: be={be}"
+
+    data = response.data[0]["embedding"]
+
+    milvus_client = get_or_create_client(database_name)
+    search_res = milvus_client.search(
+        collection_name=collection_name,
+        data=[data],
+        limit=limit,  # Return top n results
+        search_params={"metric_type": "COSINE", "params": {}},
+        output_fields=["text"],  # Return the text field
+    )
+
+    # Note that this throws away document metadata (if any)
+    return "\n".join([res["entity"]["text"] for res in search_res[0]])
diff --git a/examples/rag/rag_library1.pdl b/examples/rag/rag_library1.pdl
@@ -0,0 +1,38 @@
+# This module can be included from a PDL program to bring in Python functions.
+
+description: RAG library for PDL
+text:
+- def: pdf_parse
+  function:
+    filename: str
+    chunk_size: int
+    chunk_overlap: int
+  return:
+    lang: python
+    code: |
+        from examples.rag.rag import parse
+        result = parse(filename, chunk_size, chunk_overlap)
+- def: rag_index
+  function:
+    inp: list # This is a list[str], but PDL doesn't allow that type
+    encoder_model: str
+    embed_dimension: int
+    database_name: str # optional, could also be URL?
+    collection_name: str
+  return:
+    lang: python
+    code: |
+        from examples.rag.rag import rag_index
+        result = rag_index(inp, encoder_model, embed_dimension, database_name, collection_name)
+- def: rag_retrieve
+  function:
+    inp: str
+    encoder_model: str
+    limit: int
+    collection_name: str
+    database_name: str # optional, could also be URL?
+  return:
+    lang: python
+    code: |
+        from examples.rag.rag import rag_retrieve
+        result = rag_retrieve(inp, encoder_model, limit, database_name, collection_name)
diff --git a/examples/tfidf_rag/README.md b/examples/tfidf_rag/README.md
@@ -0,0 +1,4 @@
+This example requires you to install:
+```
+pip install scikit-learn
+```
diff --git a/examples/rag/rag.pdl → examples/tfidf_rag/rag.pdl b/examples/rag/rag.pdl → examples/tfidf_rag/rag.pdl
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,6 +39,10 @@ dev = [
   "pydantic~=2.9"
 ]
 examples = [
+  "pymilvus~=2.5",
+  "langchain~=0.3",
+  "langchain-community~=0.3",
+  "pypdf~=5.2",
   "wikipedia~=1.0",
   "textdistance~=4.0",
   "datasets>2,<4",

diff --git a/src/pdl/pdl_interpreter.py b/src/pdl/pdl_interpreter.py
@@ -1398,7 +1398,7 @@ def process_call_code(
                 )
             except Exception as exc:
                 raise PDLRuntimeError(
-                    f"Code error: {repr(exc)}",
+                    f"Python Code error: {repr(exc)}",
                     loc=loc,
                     trace=block.model_copy(
                         update={"code": code_s, "defsite": block.id}
@@ -1416,7 +1416,7 @@ def process_call_code(
                 )
             except Exception as exc:
                 raise PDLRuntimeError(
-                    f"Code error: {repr(exc)}",
+                    f"Shell Code error: {repr(exc)}",
                     loc=loc,
                     trace=block.model_copy(update={"code": code_s}),
                 ) from exc
@@ -1436,7 +1436,7 @@ def process_call_code(
                 )
             except Exception as exc:
                 raise PDLRuntimeError(
-                    f"Code error: {repr(exc)}",
+                    f"Jinja Code error: {repr(exc)}",
                     loc=loc,
                     trace=block.model_copy(update={"code": code_s}),
                 ) from exc
@@ -1449,7 +1449,7 @@ def process_call_code(
                 )
             except Exception as exc:
                 raise PDLRuntimeError(
-                    f"Code error: {repr(exc)}",
+                    f"PDL Code error: {repr(exc)}",
                     loc=loc,
                     trace=block.model_copy(update={"code": code_s}),
                 ) from exc