diff --git a/tests/e2e/test_chat_completions.py b/tests/e2e/test_chat_completions.py index dd362885..09e3eb73 100644 --- a/tests/e2e/test_chat_completions.py +++ b/tests/e2e/test_chat_completions.py @@ -670,7 +670,7 @@ def test_web_search(client, model, high_web_search_rate_limit): messages=[ { "role": "system", - "content": "You are a helpful assistant that provides accurate and up-to-date information.", + "content": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", }, { "role": "user", @@ -678,7 +678,7 @@ def test_web_search(client, model, high_web_search_rate_limit): }, ], extra_body={"web_search": True}, - temperature=0.2, + temperature=0.01, max_tokens=150, ) diff --git a/tests/e2e/test_chat_completions_http.py b/tests/e2e/test_chat_completions_http.py index b7dffcef..807805bb 100644 --- a/tests/e2e/test_chat_completions_http.py +++ b/tests/e2e/test_chat_completions_http.py @@ -766,7 +766,7 @@ def test_web_search(client, model, high_web_search_rate_limit): "messages": [ { "role": "system", - "content": "You are a helpful assistant that provides accurate and up-to-date information.", + "content": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", }, { "role": "user", @@ -774,6 +774,7 @@ def test_web_search(client, model, high_web_search_rate_limit): }, ], "extra_body": {"web_search": True}, + "temperature": 0.01, "max_tokens": 150, } diff --git a/tests/e2e/test_responses.py b/tests/e2e/test_responses.py index 79679d7c..02d20804 100644 --- a/tests/e2e/test_responses.py +++ b/tests/e2e/test_responses.py @@ -1,5 +1,6 @@ import json import os + import pytest import pytest_asyncio @@ -684,9 +685,9 @@ def test_web_search(client, model, high_web_search_rate_limit): response = client.responses.create( model=model, input="Who won the Roland Garros Open in 2024? Just reply with the winner's name.", - instructions="You are a helpful assistant that provides accurate and up-to-date information.", + instructions="You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", extra_body={"web_search": True}, - temperature=0.2, + temperature=0.01, ) assert response is not None, "Response should not be None" @@ -699,21 +700,46 @@ def test_web_search(client, model, high_web_search_rate_limit): message_items = [ item for item in response.output if getattr(item, "type", None) == "message" ] - assert len(message_items) > 0, ( - f"Response should contain message items. Found types: {output_types}" - ) - - message = message_items[0] - assert hasattr(message, "content") and len(message.content) > 0, ( - "Message should have content" - ) + text_items = [ + item for item in response.output if getattr(item, "type", None) == "text" + ] + reasoning_items = [ + item for item in response.output if getattr(item, "type", None) == "reasoning" + ] - text_item = next( - (c for c in message.content if getattr(c, "type", None) == "output_text"), None + assert message_items or text_items or reasoning_items, ( + f"Response should contain message, text, or reasoning items. Found types: {output_types}" ) - assert text_item is not None, "Message content should contain an output_text item" - content = getattr(text_item, "text", "") + if message_items: + message = message_items[0] + assert hasattr(message, "content") and len(message.content) > 0, ( + "Message should have content" + ) + text_item = next( + (c for c in message.content if getattr(c, "type", None) == "output_text"), + None, + ) + assert text_item is not None, ( + "Message content should contain an output_text item" + ) + content = getattr(text_item, "text", "") + elif text_items: + content = getattr(text_items[0], "text", "") + else: + parts = getattr(reasoning_items[0], "content", None) or [] + text_part = next( + ( + c + for c in parts + if getattr(c, "type", None) in ("output_text", "reasoning_text") + ), + None, + ) + assert text_part is not None and getattr(text_part, "text", ""), ( + "Reasoning item missing text content" + ) + content = getattr(text_part, "text", "") assert content, "Response should contain content" sources = getattr(response, "sources", None) diff --git a/tests/e2e/test_responses_http.py b/tests/e2e/test_responses_http.py index 9d37f7df..f4692f7d 100644 --- a/tests/e2e/test_responses_http.py +++ b/tests/e2e/test_responses_http.py @@ -714,9 +714,9 @@ def test_web_search(client, model, high_web_search_rate_limit): "model": model, "input": "Who won the Roland Garros Open in 2024? Just reply with the winner's name.", "instructions": "You are a helpful assistant that provides accurate and up-to-date information. Answer in 10 words maximum and do not reason.", - "temperature": 0.95, - "max_output_tokens": 15000, + "max_output_tokens": 150, "extra_body": {"web_search": True}, + "temperature": 0.01, } response = client.post("/responses", json=payload, timeout=180)