From 617dc0ae33b28db1743143d660f03fad9bd07eee Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Sat, 11 Oct 2025 17:29:47 -0700 Subject: [PATCH 1/3] fix raw json schema support in extract --- stagehand/handlers/extract_handler.py | 4 +--- stagehand/llm/inference.py | 7 ++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/stagehand/handlers/extract_handler.py b/stagehand/handlers/extract_handler.py index 8af621c..5fa2421 100644 --- a/stagehand/handlers/extract_handler.py +++ b/stagehand/handlers/extract_handler.py @@ -151,9 +151,7 @@ async def extract( processed_data_payload = raw_data_dict # Default to the raw dictionary - if schema and isinstance( - raw_data_dict, dict - ): # schema is the Pydantic model type + if schema and isinstance(schema, type) and issubclass(schema, BaseModel): # Try direct validation first try: validated_model_instance = schema.model_validate(raw_data_dict) diff --git a/stagehand/llm/inference.py b/stagehand/llm/inference.py index b438883..64d8597 100644 --- a/stagehand/llm/inference.py +++ b/stagehand/llm/inference.py @@ -169,11 +169,12 @@ async def extract( start_time = time.time() # Determine if we need to use schema-based response format - # TODO: if schema is json, return json response_format = {"type": "json_object"} if schema: - # If schema is a Pydantic model, use it directly - response_format = schema + if isinstance(schema, dict): + response_format = {"type": "json_schema", "json_schema": {"name": "extraction_schema", "strict": False, "schema": schema}} + else: + response_format = schema # Call the LLM with appropriate parameters try: From c3b523711ecd8f779aedc058e6432d489b4e87ae Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Tue, 14 Oct 2025 08:47:11 -0700 Subject: [PATCH 2/3] formatting --- stagehand/llm/inference.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/stagehand/llm/inference.py b/stagehand/llm/inference.py index 64d8597..5c9d122 100644 --- a/stagehand/llm/inference.py +++ b/stagehand/llm/inference.py @@ -172,7 +172,14 @@ async def extract( response_format = {"type": "json_object"} if schema: if isinstance(schema, dict): - response_format = {"type": "json_schema", "json_schema": {"name": "extraction_schema", "strict": False, "schema": schema}} + response_format = { + "type": "json_schema", + "json_schema": { + "name": "extraction_schema", + "strict": False, + "schema": schema, + }, + } else: response_format = schema From 73e828ca07f214194de2271e5dbd49275f84bfd4 Mon Sep 17 00:00:00 2001 From: Derek Meegan Date: Tue, 14 Oct 2025 11:51:18 -0700 Subject: [PATCH 3/3] add changeset --- .changeset/fortunate-friendly-avocet.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fortunate-friendly-avocet.md diff --git a/.changeset/fortunate-friendly-avocet.md b/.changeset/fortunate-friendly-avocet.md new file mode 100644 index 0000000..5f1ee44 --- /dev/null +++ b/.changeset/fortunate-friendly-avocet.md @@ -0,0 +1,5 @@ +--- +"stagehand": patch +--- + +Fix ability to pass raw JSON to Extract schema