|
16 | 16 |
|
17 | 17 | MODEL_NAME = "openai/gpt-oss-20b" |
18 | 18 |
|
| 19 | +GET_WEATHER_SCHEMA = { |
| 20 | + "type": "function", |
| 21 | + "name": "get_weather", |
| 22 | + "description": "Get current temperature for provided coordinates in celsius.", # noqa |
| 23 | + "parameters": { |
| 24 | + "type": "object", |
| 25 | + "properties": { |
| 26 | + "latitude": {"type": "number"}, |
| 27 | + "longitude": {"type": "number"}, |
| 28 | + }, |
| 29 | + "required": ["latitude", "longitude"], |
| 30 | + "additionalProperties": False, |
| 31 | + }, |
| 32 | + "strict": True, |
| 33 | +} |
| 34 | + |
19 | 35 |
|
20 | 36 | @pytest.fixture(scope="module") |
21 | 37 | def server(): |
@@ -305,6 +321,54 @@ async def test_streaming_types(client: OpenAI, model_name: str): |
305 | 321 | assert len(stack_of_event_types) == 0 |
306 | 322 |
|
307 | 323 |
|
| 324 | +@pytest.mark.asyncio |
| 325 | +@pytest.mark.parametrize("model_name", [MODEL_NAME]) |
| 326 | +async def test_function_calling_with_streaming_types(client: OpenAI, model_name: str): |
| 327 | + # this links the "done" type with the "start" type |
| 328 | + # so every "done" type should have a corresponding "start" type |
| 329 | + # and every open block should be closed by the end of the stream |
| 330 | + pairs_of_event_types = { |
| 331 | + "response.completed": "response.created", |
| 332 | + "response.output_item.done": "response.output_item.added", |
| 333 | + "response.output_text.done": "response.output_text.delta", |
| 334 | + "response.reasoning_text.done": "response.reasoning_text.delta", |
| 335 | + "response.reasoning_part.done": "response.reasoning_part.added", |
| 336 | + "response.function_call_arguments.done": "response.function_call_arguments.delta", # noqa |
| 337 | + } |
| 338 | + |
| 339 | + tools = [GET_WEATHER_SCHEMA] |
| 340 | + input_list = [ |
| 341 | + { |
| 342 | + "role": "user", |
| 343 | + "content": "What's the weather like in Paris today?", |
| 344 | + } |
| 345 | + ] |
| 346 | + stream_response = await client.responses.create( |
| 347 | + model=model_name, |
| 348 | + input=input_list, |
| 349 | + tools=tools, |
| 350 | + stream=True, |
| 351 | + ) |
| 352 | + |
| 353 | + stack_of_event_types = [] |
| 354 | + async for event in stream_response: |
| 355 | + if event.type == "response.created": |
| 356 | + stack_of_event_types.append(event.type) |
| 357 | + elif event.type == "response.completed": |
| 358 | + assert stack_of_event_types[-1] == pairs_of_event_types[event.type] |
| 359 | + stack_of_event_types.pop() |
| 360 | + if event.type.endswith("added"): |
| 361 | + stack_of_event_types.append(event.type) |
| 362 | + elif event.type.endswith("delta"): |
| 363 | + if stack_of_event_types[-1] == event.type: |
| 364 | + continue |
| 365 | + stack_of_event_types.append(event.type) |
| 366 | + elif event.type.endswith("done"): |
| 367 | + assert stack_of_event_types[-1] == pairs_of_event_types[event.type] |
| 368 | + stack_of_event_types.pop() |
| 369 | + assert len(stack_of_event_types) == 0 |
| 370 | + |
| 371 | + |
308 | 372 | @pytest.mark.asyncio |
309 | 373 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
310 | 374 | @pytest.mark.parametrize("background", [True, False]) |
@@ -483,23 +547,7 @@ def call_function(name, args): |
483 | 547 | @pytest.mark.asyncio |
484 | 548 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
485 | 549 | async def test_function_calling(client: OpenAI, model_name: str): |
486 | | - tools = [ |
487 | | - { |
488 | | - "type": "function", |
489 | | - "name": "get_weather", |
490 | | - "description": "Get current temperature for provided coordinates in celsius.", # noqa |
491 | | - "parameters": { |
492 | | - "type": "object", |
493 | | - "properties": { |
494 | | - "latitude": {"type": "number"}, |
495 | | - "longitude": {"type": "number"}, |
496 | | - }, |
497 | | - "required": ["latitude", "longitude"], |
498 | | - "additionalProperties": False, |
499 | | - }, |
500 | | - "strict": True, |
501 | | - } |
502 | | - ] |
| 550 | + tools = [GET_WEATHER_SCHEMA] |
503 | 551 |
|
504 | 552 | response = await client.responses.create( |
505 | 553 | model=model_name, |
@@ -565,21 +613,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str): |
565 | 613 | }, |
566 | 614 | "strict": True, |
567 | 615 | }, |
568 | | - { |
569 | | - "type": "function", |
570 | | - "name": "get_weather", |
571 | | - "description": "Get current temperature for provided coordinates in celsius.", # noqa |
572 | | - "parameters": { |
573 | | - "type": "object", |
574 | | - "properties": { |
575 | | - "latitude": {"type": "number"}, |
576 | | - "longitude": {"type": "number"}, |
577 | | - }, |
578 | | - "required": ["latitude", "longitude"], |
579 | | - "additionalProperties": False, |
580 | | - }, |
581 | | - "strict": True, |
582 | | - }, |
| 616 | + GET_WEATHER_SCHEMA, |
583 | 617 | ] |
584 | 618 |
|
585 | 619 | response = await client.responses.create( |
@@ -643,23 +677,7 @@ async def test_function_calling_multi_turn(client: OpenAI, model_name: str): |
643 | 677 | @pytest.mark.asyncio |
644 | 678 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
645 | 679 | async def test_function_calling_required(client: OpenAI, model_name: str): |
646 | | - tools = [ |
647 | | - { |
648 | | - "type": "function", |
649 | | - "name": "get_weather", |
650 | | - "description": "Get current temperature for provided coordinates in celsius.", # noqa |
651 | | - "parameters": { |
652 | | - "type": "object", |
653 | | - "properties": { |
654 | | - "latitude": {"type": "number"}, |
655 | | - "longitude": {"type": "number"}, |
656 | | - }, |
657 | | - "required": ["latitude", "longitude"], |
658 | | - "additionalProperties": False, |
659 | | - }, |
660 | | - "strict": True, |
661 | | - } |
662 | | - ] |
| 680 | + tools = [GET_WEATHER_SCHEMA] |
663 | 681 |
|
664 | 682 | with pytest.raises(BadRequestError): |
665 | 683 | await client.responses.create( |
@@ -689,23 +707,7 @@ async def test_system_message_with_tools(client: OpenAI, model_name: str): |
689 | 707 | @pytest.mark.asyncio |
690 | 708 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
691 | 709 | async def test_function_calling_full_history(client: OpenAI, model_name: str): |
692 | | - tools = [ |
693 | | - { |
694 | | - "type": "function", |
695 | | - "name": "get_weather", |
696 | | - "description": "Get current temperature for provided coordinates in celsius.", # noqa |
697 | | - "parameters": { |
698 | | - "type": "object", |
699 | | - "properties": { |
700 | | - "latitude": {"type": "number"}, |
701 | | - "longitude": {"type": "number"}, |
702 | | - }, |
703 | | - "required": ["latitude", "longitude"], |
704 | | - "additionalProperties": False, |
705 | | - }, |
706 | | - "strict": True, |
707 | | - } |
708 | | - ] |
| 710 | + tools = [GET_WEATHER_SCHEMA] |
709 | 711 |
|
710 | 712 | input_messages = [ |
711 | 713 | {"role": "user", "content": "What's the weather like in Paris today?"} |
@@ -745,6 +747,74 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str): |
745 | 747 | assert response_2.output_text is not None |
746 | 748 |
|
747 | 749 |
|
| 750 | +@pytest.mark.asyncio |
| 751 | +@pytest.mark.parametrize("model_name", [MODEL_NAME]) |
| 752 | +async def test_function_calling_with_stream(client: OpenAI, model_name: str): |
| 753 | + tools = [GET_WEATHER_SCHEMA] |
| 754 | + input_list = [ |
| 755 | + { |
| 756 | + "role": "user", |
| 757 | + "content": "What's the weather like in Paris today?", |
| 758 | + } |
| 759 | + ] |
| 760 | + stream_response = await client.responses.create( |
| 761 | + model=model_name, |
| 762 | + input=input_list, |
| 763 | + tools=tools, |
| 764 | + stream=True, |
| 765 | + ) |
| 766 | + assert stream_response is not None |
| 767 | + final_tool_calls = {} |
| 768 | + final_tool_calls_named = {} |
| 769 | + async for event in stream_response: |
| 770 | + if event.type == "response.output_item.added": |
| 771 | + if event.item.type != "function_call": |
| 772 | + continue |
| 773 | + final_tool_calls[event.output_index] = event.item |
| 774 | + final_tool_calls_named[event.item.name] = event.item |
| 775 | + elif event.type == "response.function_call_arguments.delta": |
| 776 | + index = event.output_index |
| 777 | + tool_call = final_tool_calls[index] |
| 778 | + if tool_call: |
| 779 | + tool_call.arguments += event.delta |
| 780 | + final_tool_calls_named[tool_call.name] = tool_call |
| 781 | + elif event.type == "response.function_call_arguments.done": |
| 782 | + assert event.arguments == final_tool_calls_named[event.name].arguments |
| 783 | + for tool_call in final_tool_calls.values(): |
| 784 | + if ( |
| 785 | + tool_call |
| 786 | + and tool_call.type == "function_call" |
| 787 | + and tool_call.name == "get_weather" |
| 788 | + ): |
| 789 | + args = json.loads(tool_call.arguments) |
| 790 | + result = call_function(tool_call.name, args) |
| 791 | + input_list += [tool_call] |
| 792 | + break |
| 793 | + assert result is not None |
| 794 | + response = await client.responses.create( |
| 795 | + model=model_name, |
| 796 | + input=input_list |
| 797 | + + [ |
| 798 | + { |
| 799 | + "type": "function_call_output", |
| 800 | + "call_id": tool_call.call_id, |
| 801 | + "output": str(result), |
| 802 | + } |
| 803 | + ], |
| 804 | + tools=tools, |
| 805 | + stream=True, |
| 806 | + ) |
| 807 | + assert response is not None |
| 808 | + async for event in response: |
| 809 | + # check that no function call events in the stream |
| 810 | + assert event.type != "response.function_call_arguments.delta" |
| 811 | + assert event.type != "response.function_call_arguments.done" |
| 812 | + # check that the response contains output text |
| 813 | + if event.type == "response.completed": |
| 814 | + assert len(event.response.output) > 0 |
| 815 | + assert event.response.output_text is not None |
| 816 | + |
| 817 | + |
748 | 818 | @pytest.mark.asyncio |
749 | 819 | @pytest.mark.parametrize("model_name", [MODEL_NAME]) |
750 | 820 | async def test_output_messages_enabled(client: OpenAI, model_name: str, server): |
|
0 commit comments