@@ -1888,3 +1888,107 @@ async def test_retrieve_response_with_structured_content_object(
18881888 # Should convert the structured object to string representation
18891889 assert response == str (structured_content )
18901890 assert conversation_id == "fake_conversation_id"
1891+
1892+
1893+ @pytest .mark .asyncio
1894+ async def test_retrieve_response_skips_invalid_docs_url (prepare_agent_mocks , mocker ):
1895+ """Test that retrieve_response skips entries with invalid docs_url."""
1896+ mock_client , mock_agent = prepare_agent_mocks
1897+ mock_agent .create_turn .return_value .output_message .content = "LLM answer"
1898+ mock_client .shields .list .return_value = []
1899+ mock_client .vector_dbs .list .return_value = []
1900+
1901+ # Mock tool response with valid and invalid docs_url entries
1902+ invalid_docs_url_results = [
1903+ """knowledge_search tool found 2 chunks:
1904+ BEGIN of knowledge_search tool results.
1905+ """ ,
1906+ """Result 1
1907+ Content: Valid content
1908+ Metadata: {'docs_url': 'https://example.com/doc1', 'title': 'Valid Doc', 'document_id': 'doc-1'}
1909+ """ ,
1910+ """Result 2
1911+ Content: Invalid content
1912+ Metadata: {'docs_url': 'not-a-valid-url', 'title': 'Invalid Doc', 'document_id': 'doc-2'}
1913+ """ ,
1914+ """END of knowledge_search tool results.
1915+ """ ,
1916+ ]
1917+
1918+ mock_tool_response = mocker .Mock ()
1919+ mock_tool_response .call_id = "c1"
1920+ mock_tool_response .tool_name = "knowledge_search"
1921+ mock_tool_response .content = [
1922+ mocker .Mock (text = s , type = "text" ) for s in invalid_docs_url_results
1923+ ]
1924+
1925+ mock_tool_execution_step = mocker .Mock ()
1926+ mock_tool_execution_step .step_type = "tool_execution"
1927+ mock_tool_execution_step .tool_responses = [mock_tool_response ]
1928+
1929+ mock_agent .create_turn .return_value .steps = [mock_tool_execution_step ]
1930+
1931+ # Mock configuration with empty MCP servers
1932+ mock_config = mocker .Mock ()
1933+ mock_config .mcp_servers = []
1934+ mocker .patch ("app.endpoints.query.configuration" , mock_config )
1935+ mocker .patch (
1936+ "app.endpoints.query.get_agent" ,
1937+ return_value = (mock_agent , "fake_conversation_id" , "fake_session_id" ),
1938+ )
1939+
1940+ query_request = QueryRequest (query = "What is OpenStack?" )
1941+ model_id = "fake_model_id"
1942+ access_token = "test_token"
1943+
1944+ response , conversation_id , referenced_documents = await retrieve_response (
1945+ mock_client , model_id , query_request , access_token
1946+ )
1947+
1948+ assert response == "LLM answer"
1949+ assert conversation_id == "fake_conversation_id"
1950+
1951+ # Assert only the valid document is included, invalid one is skipped
1952+ assert len (referenced_documents ) == 1
1953+ assert str (referenced_documents [0 ].doc_url ) == "https://example.com/doc1"
1954+ assert referenced_documents [0 ].doc_title == "Valid Doc"
1955+
1956+
1957+ @pytest .mark .asyncio
1958+ async def test_extract_referenced_documents_from_steps_handles_validation_errors (
1959+ mocker ,
1960+ ):
1961+ """Test that extract_referenced_documents_from_steps handles validation errors gracefully."""
1962+ # Mock tool response with invalid docs_url that will cause pydantic validation error
1963+ mock_tool_response = mocker .Mock ()
1964+ mock_tool_response .tool_name = "knowledge_search"
1965+ mock_tool_response .content = [
1966+ mocker .Mock (
1967+ text = """Result 1
1968+ Content: Valid content
1969+ Metadata: {'docs_url': 'https://example.com/doc1', 'title': 'Valid Doc', 'document_id': 'doc-1'}
1970+ """
1971+ ),
1972+ mocker .Mock (
1973+ text = """Result 2
1974+ Content: Invalid content
1975+ Metadata: {'docs_url': 'invalid-url', 'title': 'Invalid Doc', 'document_id': 'doc-2'}
1976+ """
1977+ ),
1978+ ]
1979+
1980+ mock_tool_execution_step = mocker .Mock ()
1981+ mock_tool_execution_step .step_type = "tool_execution"
1982+ mock_tool_execution_step .tool_responses = [mock_tool_response ]
1983+
1984+ steps = [mock_tool_execution_step ]
1985+
1986+ # Import the function directly to test it
1987+ from app .endpoints .query import extract_referenced_documents_from_steps
1988+
1989+ referenced_documents = extract_referenced_documents_from_steps (steps )
1990+
1991+ # Should only return the valid document, skipping the invalid one
1992+ assert len (referenced_documents ) == 1
1993+ assert str (referenced_documents [0 ].doc_url ) == "https://example.com/doc1"
1994+ assert referenced_documents [0 ].doc_title == "Valid Doc"
0 commit comments