IBM · jgchn · Mar 31, 2025 · Mar 29, 2025 · Mar 29, 2025 · Mar 29, 2025
diff --git a/.github/workflows/run-examples.yml b/.github/workflows/run-examples.yml
@@ -34,13 +34,14 @@ jobs:
       shell: bash
       run: |
         ollama pull granite3.2:2b
+        ollama pull granite3.2:8b
         ollama pull mxbai-embed-large
         ollama list
 
     - name: Check that all required models are available
       shell: bash
       run: |
-        models=("mxbai-embed-large" "granite3.2:2b")
+        models=("mxbai-embed-large" "granite3.2:2b" "granite3.2:8b")
         missing=0
         for model in "${models[@]}"; do
           if ! ollama list | awk 'NR>1 {print $1}' | grep -q "$model"; then
@@ -63,6 +64,8 @@ jobs:
 
     # Run tests
     - uses: actions/checkout@v4
+      with:
+        ref: ${{ github.head_ref }}
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v5
       with:
@@ -91,4 +94,14 @@ jobs:
         WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
         WATSONX_URL: ${{ secrets.WATSONX_URL }}
         REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }}
+        OLLAMA_GHACTIONS_RESULTS: true
       run: py.test -v --capture=tee-sys -rfE -s tests/test_examples_run.py
+    - name: Update example result files (if any) generated from Ollama running on GH Actions
+      if: matrix.python-version == '3.11'
+      run: |
+        git config --local user.name github-actions[bot]
+        git config --local user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com"
+        git status
+        git add tests/results/
+        git diff --cached --quiet || git commit -S -s -m "github-actions[bot]: Updated results file when running examples on $(date)"
+        git push
diff --git a/examples/chatbot/chatbot.pdl b/examples/chatbot/chatbot.pdl
@@ -5,7 +5,7 @@ text:
   message: "What is your query?\n"
 - repeat:
     text:
-    # Send context to Granite model hosted at replicate.com
+    # Send context to Granite model hosted at ollama
     - model: ollama_chat/granite3.2:2b
     # Allow the user to type 'yes', 'no', or anything else, storing
     # the input into a variable named `eval`.  The input is also implicitly

diff --git a/examples/cldk/cldk-assistant.pdl b/examples/cldk/cldk-assistant.pdl
@@ -1,5 +1,5 @@
 description: CodeLLM-Devkit Assistant
-text: 
+text:
 - read:
   def: project
   message: "Please enter the path to your Java project:\n"
@@ -34,9 +34,9 @@ text:
       contribute: []
     - "\n***Generating PDL code for your query:\n"
     - if: ${ query != 'quit'}
-      then: 
+      then:
         text:
-          - model: replicate/ibm-granite/granite-3.1-8b-instruct
+          - model: ollama_chat/granite3.2:8b
             def: PDL
             input: |
                 Question: What are all the classes?
@@ -86,7 +86,7 @@ text:
                 text:
                 - lang: python
                   code: |
-                    graph =  PDL_SESSION.cldk_state.get_class_call_graph("org.ibm.App", method_name=None) 
+                    graph =  PDL_SESSION.cldk_state.get_class_call_graph("org.ibm.App", method_name=None)
                     result = graph
                 ```
 
@@ -109,7 +109,7 @@ text:
                     method = PDL_SESSION.cldk_state.get_method("org.ibm.App", "Foo(string)")
                     result = method
                 - "\n\nGenerate a summary of method Foo\n\n"
-                - model: replicate/ibm-granite/granite-3.1-8b-instruct
+                - model: ollama_chat/granite3.2:8b
                 ```
 
                 Question: Generate a different comment for method Foo(string) in class org.ibm.App?
@@ -121,11 +121,11 @@ text:
                     method = PDL_SESSION.cldk_state.get_method("org.ibm.App", "Foo(string)")
                     result = method
                 - "\n\nGenerate a different comment for method Foo(string)\n\n"
-                - model: replicate/ibm-granite/granite-3.1-8b-instruct
+                - model: ollama_chat/granite3.2:8b
                 ```
 
                 If the query contains something about a field be sure to call a model.
-                
+
                 Question: ${ query }
 
 
@@ -135,10 +135,10 @@ text:
           - "\n\n***Executing the above PDL code:\n\n"
           - lang: python
             contribute: [result]
-            code: | 
+            code: |
               from pdl.pdl import exec_str
               s = """${ PDL }"""
               pdl = s.split("```")[1]
               result = exec_str(pdl)
-              
+
   until: ${ query == 'quit' }
diff --git a/examples/demo/10-sdg.pdl b/examples/demo/10-sdg.pdl
@@ -1,6 +1,6 @@
 defs:
   teacher_sys_prompt: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
-  teacher_model: replicate/ibm-granite/granite-3.1-8b-instruct
+  teacher_model: ollama_chat/granite3.2:8b
   teacher_template:
     function:
       sys_prompt: str
@@ -29,13 +29,13 @@ defs:
           * The questions should not be template-based or generic, it should be very diverse.
           * Simply return the questions, do not return any answers or explanations.
           * Strictly adhere to the prompt and generate responses in the same style and format as the example.
-          Use this format to generate the questions: 
-          ### Question 1: 
+          Use this format to generate the questions:
+          ### Question 1:
         examples: |
           To better assist you with this task, here is an example:
           ### Question 1: ${icl_question}
         generation: |
-          Now generate ${num_samples} such questions, remember to follow the principles mentioned above and use the same format as the examples. Remember to use the same style and format as the example above. 
+          Now generate ${num_samples} such questions, remember to follow the principles mentioned above and use the same format as the examples. Remember to use the same style and format as the example above.
         max_new_tokens: 10000
 
   gen_questions_freeform_inner:
@@ -203,7 +203,7 @@ defs:
     spec: {introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int, additional_stop_tokens: [str]}
     return:
       data:
-        introduction: Your task is to faithfully follow the user's prompt and generate a response. 
+        introduction: Your task is to faithfully follow the user's prompt and generate a response.
         principles: |
           Please follow these guiding principles when generating responses:
           * Use proper grammar and punctuation.
@@ -299,7 +299,7 @@ defs:
         introduction: |
           Please act as an impartial judge and evaluate the quality of the answer provided by an AI assistant to the questions displayed below. Evaluate whether or not the answer is a good example of how AI Assistant should respond to the user's instruction. Please assign a score using the following 3-point scale.
         principles: |
-          1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information. For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that doesn't address the user's question or it could be incomplete and hanging. It may also include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content.  
+          1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information. For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that doesn't address the user's question or it could be incomplete and hanging. It may also include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content.
 
           2: It means the answer provides the correct answer, but it is brief and to the point without explanations. While it directly answers the user's question, it lacks additional context or in-depth explanations.
 
@@ -401,7 +401,7 @@ text:
 - def: qa_pairs
   call: ${gen_answers}
   args:
-    questions: ${filtered_questions} 
+    questions: ${filtered_questions}
 - "\n\n----- Filtering QA pairs -----\n\n"
 - call: ${filter_question_answer_pair}
   args:

diff --git a/examples/demo/8-tools.pdl b/examples/demo/8-tools.pdl
@@ -17,7 +17,7 @@ text:
   contribute: [context]
 - "Out of 1400 participants, 400 passed the test. What percentage is that?\n"
 - def: actions
-  model: replicate/ibm-granite/granite-3.1-8b-instruct
+  model: ollama_chat/granite3.2:8b
   parser: json
   spec: [{ name: str, arguments: { expr: str }}]
   parameters:

diff --git a/examples/demo/9-react.pdl b/examples/demo/9-react.pdl
@@ -63,12 +63,12 @@ text:
 - repeat:
     text:
     - def: thought
-      model: replicate/ibm-granite/granite-3.1-8b-instruct
+      model: ollama_chat/granite3.2:8b
       parameters:
         stop_sequences: "Action:"
     - "Action:\n"
     - def: action
-      model: replicate/ibm-granite/granite-3.1-8b-instruct
+      model: ollama_chat/granite3.2:8b
       parameters:
         stop_sequences: "\n"
       parser: json

diff --git a/examples/react/demo.pdl b/examples/react/demo.pdl
@@ -63,12 +63,12 @@ text:
 - repeat:
     text:
     - def: thought
-      model: replicate/ibm-granite/granite-3.1-8b-instruct
+      model: ollama_chat/granite3.2:8b
       parameters:
         stop_sequences: "Action:"
     - "Action:\n"
     - def: action
-      model: replicate/ibm-granite/granite-3.1-8b-instruct
+      model: ollama_chat/granite3.2:8b
       parameters:
         stop_sequences: "\n"
       parser: json

diff --git a/examples/react/react_call.pdl b/examples/react/react_call.pdl
@@ -5,6 +5,6 @@ text:
   - call: ${ lib.react }
     args:
       question: How many years ago was the discoverer of the Hudson River born? Keep in mind we are in 2025.
-      model: replicate/ibm-granite/granite-3.1-8b-instruct
+      model: ollama_chat/granite3.2:8b
 
 
diff --git a/examples/react/react_fun.pdl b/examples/react/react_fun.pdl
@@ -13,13 +13,13 @@ defs:
             - name: Calc
               description: Calculator function
               arguments:
-                expr: 
+                expr:
                   type: string
                   description: Arithmetic expression to calculate
             - name: Search
               description: Wikipedia search
               arguments:
-                topic: 
+                topic:
                   type: string
                   description: Topic to search
       - for:
@@ -46,15 +46,17 @@ defs:
           - def: thought
             model: ${ model }
             parameters:
+              temperature: 0
               stop_sequences: "Action:"
           - "Action:\n"
           - def: action
             model: ${ model }
             parameters:
+              temperature: 0
               stop_sequences: "\n"
             parser: json
           - if: ${ action != prev_action}
-            then: 
+            then:
               def: observation
               if: ${ action[0].name == "Search" }
               then:
@@ -85,39 +87,39 @@ defs:
             contribute: []
             data: ${ action }
         until: ${ action[0].name == "Finish" or exit }
-  
+
   react:
     function:
       question: str
       model: str
     return:
-        defs: 
+        defs:
           examples:
             array:
-                - text: 
+                - text:
                     |
                     What profession does Nicholas Ray and Elia Kazan have in common?
                     Thought: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.
-                    Action: 
+                    Action:
                     <tool_call>[{"name": "Search", "arguments": {"topic": "Nicholas Ray"}}]
                     Observation: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.
                     Thought: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.
-                    Action: 
+                    Action:
                     <tool_call>[{"name": "Search", "arguments": {"topic": "Elia Kazan"}}]
                     Observation: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.
                     Thought: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.
-                    Action: 
+                    Action:
                     <tool_call>[{"name": "Finish", "arguments": {"topic": "director, screenwriter, actor"}}]
 
 
                     What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
                     Thought: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ...
-                    Action: 
+                    Action:
                     <tool_call>[{"name": "Search", "arguments": {"topic": "Colorado orogeny"}}]
                     Observation: The Colorado orogeny was an episode of mountain building (an orogeny) ...
                     Thought: It does not mention the eastern sector. So I need to look up eastern sector.
                     Thought: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
-                    Action: 
+                    Action:
                     <tool_call>[{"name": "Finish", "arguments": {"topic": "1,800 to 7,000 ft"}}]
 
         call: ${ react_inner }

diff --git a/examples/sdk/hello_dict.py b/examples/sdk/hello_dict.py
@@ -4,7 +4,7 @@
     "text": [
         "Hello\n",
         {
-            "model": "replicate/ibm-granite/granite-3.1-8b-instruct",
+            "model": "ollama_chat/granite3.2:8b",
             "parameters": {"stop_sequences": "!"},
         },
     ]

diff --git a/examples/sdk/hello_prog.py b/examples/sdk/hello_prog.py
@@ -6,7 +6,7 @@
         text=[
             "Hello\n",
             LitellmModelBlock(
-                model="replicate/ibm-granite/granite-3.1-8b-instruct",
+                model="ollama_chat/granite3.2:8b",
                 parameters=LitellmParameters(stop_sequences="!"),  # pyright: ignore
             ),
         ]

diff --git a/examples/sdk/hello_str.py b/examples/sdk/hello_str.py
@@ -3,7 +3,7 @@
 HELLO = """
 text:
 - "Hello\n"
-- model: replicate/ibm-granite/granite-3.1-8b-instruct
+- model: ollama_chat/granite3.2:8b
   parameters:
     stop_sequences: '!'
 """

diff --git a/examples/teacher/teacher.pdl b/examples/teacher/teacher.pdl
@@ -1,6 +1,6 @@
 defs:
   teacher_sys_prompt: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task.
-  teacher_model: replicate/ibm-granite/granite-3.1-8b-instruct
+  teacher_model: ollama_chat/granite3.2:8b
   teacher_template:
     function:
       sys_prompt: str
@@ -29,13 +29,13 @@ defs:
           * The questions should not be template-based or generic, it should be very diverse.
           * Simply return the questions, do not return any answers or explanations.
           * Strictly adhere to the prompt and generate responses in the same style and format as the example.
-          Use this format to generate the questions: 
-          ### Question 1: 
+          Use this format to generate the questions:
+          ### Question 1:
         examples: |
           To better assist you with this task, here is an example:
           ### Question 1: ${icl_question}
         generation: |
-          Now generate ${num_samples} such questions, remember to follow the principles mentioned above and use the same format as the examples. Remember to use the same style and format as the example above. 
+          Now generate ${num_samples} such questions, remember to follow the principles mentioned above and use the same format as the examples. Remember to use the same style and format as the example above.
         max_new_tokens: 10000
 
   gen_questions_freeform_inner:
@@ -203,7 +203,7 @@ defs:
     spec: {introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int, additional_stop_tokens: [str]}
     return:
       data:
-        introduction: Your task is to faithfully follow the user's prompt and generate a response. 
+        introduction: Your task is to faithfully follow the user's prompt and generate a response.
         principles: |
           Please follow these guiding principles when generating responses:
           * Use proper grammar and punctuation.
@@ -299,7 +299,7 @@ defs:
         introduction: |
           Please act as an impartial judge and evaluate the quality of the answer provided by an AI assistant to the questions displayed below. Evaluate whether or not the answer is a good example of how AI Assistant should respond to the user's instruction. Please assign a score using the following 3-point scale.
         principles: |
-          1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information. For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that doesn't address the user's question or it could be incomplete and hanging. It may also include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content.  
+          1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information. For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that doesn't address the user's question or it could be incomplete and hanging. It may also include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content.
 
           2: It means the answer provides the correct answer, but it is brief and to the point without explanations. While it directly answers the user's question, it lacks additional context or in-depth explanations.
 
@@ -401,7 +401,7 @@ text:
 - def: qa_pairs
   call: ${gen_answers}
   args:
-    questions: ${filtered_questions} 
+    questions: ${filtered_questions}
 - "\n\n----- Filtering QA pairs -----\n\n"
 - call: ${filter_question_answer_pair}
   args:

diff --git a/examples/tools/calc.pdl b/examples/tools/calc.pdl
@@ -5,7 +5,7 @@ defs:
     - name: calc
       description: Calculator function
       arguments:
-        expr: 
+        expr:
           type: string
           description: Arithmetic expression to calculate
 text:
@@ -17,9 +17,9 @@ text:
   contribute: [context]
 - "Out of 1400 participants, 400 passed the test. What percentage is that?\n"
 - def: actions
-  model: replicate/ibm-granite/granite-3.1-8b-instruct
+  model: ollama_chat/granite3.2:8b
   parser: json
-  spec: [{ name: str, arguments: { expr: str }}]  
+  spec: [{ name: str, arguments: { expr: str }}]
   parameters:
     drop_params: true  # This is needed because the model does not support structured decoding. It directs LiteLLM to ignore parameters sent for structured decoding.
 - "\n"