From 13d001863aa574ded777c5604e6ae889b83b7b13 Mon Sep 17 00:00:00 2001 From: Tianyi Huang Date: Thu, 9 Oct 2025 12:10:08 -0400 Subject: [PATCH 1/2] fix web search tool --- src/cleanlab_tlm/utils/chat.py | 25 +++---------------------- tests/test_chat.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/cleanlab_tlm/utils/chat.py b/src/cleanlab_tlm/utils/chat.py index 00bb47d..03f4d48 100644 --- a/src/cleanlab_tlm/utils/chat.py +++ b/src/cleanlab_tlm/utils/chat.py @@ -755,32 +755,14 @@ def _responses_messages_to_string(messages: list[dict[str, Any]]) -> str: ) if message["action"]["type"] == "search": - if "annotations" in message: - annotations = message["annotations"] - else: - next_text_message = adjusted_messages[i + 1] - if next_text_message["type"] != "message": - continue - next_text_content = next_text_message["content"][0] - if next_text_content["type"] != "output_text": - continue - annotations = next_text_content["annotations"] - - urls = list( - { - (annotation["url"], annotation["title"]) - for annotation in annotations - if annotation["type"] == "url_citation" - } - ) + urls = list({source["url"] for source in message["action"]["sources"] if source["type"] == "url"}) with ThreadPoolExecutor() as executor: - def extract_text(pair: tuple[str, str]) -> str: + def extract_text(url: str) -> str: fallback_text = "Response is not shown, but the LLM can still access it. Assume that whatever the LLM references in this URL is true." try: - url = pair[0] if url in _url_cache: return _url_cache[url] @@ -814,10 +796,9 @@ def extract_text(pair: tuple[str, str]) -> str: websites = [ { "url": url, - "title": title, "content": data, } - for (url, title), data in zip(urls, requests) + for url, data in zip(urls, requests) ] tool_call = { diff --git a/tests/test_chat.py b/tests/test_chat.py index 0d52c0d..c14e5d4 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -11,6 +11,7 @@ ) from openai.types.responses.response_function_web_search import ( ActionSearch, + ActionSearchSource, ResponseFunctionWebSearch, ) from openai.types.responses.response_output_message import ResponseOutputMessage @@ -1308,6 +1309,12 @@ def test_form_prompt_string_responses_web_search() -> None: action=ActionSearch( query="Give me a positive news story from today", type="search", + sources=[ + ActionSearchSource( + type="url", + url="https://www.podego.com/insights/august-2025-good-news-ai-pfas-stories?utm_source=openai", + ), + ], ), status="completed", type="web_search_call", @@ -1394,7 +1401,6 @@ def test_form_prompt_string_responses_web_search() -> None: "output": [ { "url": "https://www.podego.com/insights/august-2025-good-news-ai-pfas-stories?utm_source=openai", - "title": "Positive News Highlights | AI, PFAS Breakthroughs & More \\u2014 August 2025 \\u2014 Podego", "content": "MOCK CONTENT" } ] @@ -2342,6 +2348,12 @@ def test_form_response_string_responses_web_search() -> None: action=ActionSearch( query="Give me a positive news story from today", type="search", + sources=[ + ActionSearchSource( + type="url", + url="https://www.podego.com/insights/august-2025-good-news-ai-pfas-stories?utm_source=openai", + ), + ], ), status="completed", type="web_search_call", From 8b4897c22cdf98cb56d4a9c2529e98f453bf96a5 Mon Sep 17 00:00:00 2001 From: Tianyi Huang Date: Tue, 4 Nov 2025 00:18:35 +0900 Subject: [PATCH 2/2] add web search warning --- src/cleanlab_tlm/utils/chat.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cleanlab_tlm/utils/chat.py b/src/cleanlab_tlm/utils/chat.py index 03f4d48..dd5b3af 100644 --- a/src/cleanlab_tlm/utils/chat.py +++ b/src/cleanlab_tlm/utils/chat.py @@ -755,6 +755,14 @@ def _responses_messages_to_string(messages: list[dict[str, Any]]) -> str: ) if message["action"]["type"] == "search": + if message["action"]["sources"] is None: + warnings.warn( + "Web search call returned no results. Please include include=['web_search_call.action.sources'] in your request.", + UserWarning, + stacklevel=2, + ) + continue + urls = list({source["url"] for source in message["action"]["sources"] if source["type"] == "url"}) with ThreadPoolExecutor() as executor: