diff --git a/CHANGELOG.md b/CHANGELOG.md index 0891c3fc..75847a47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 7.4.3 - 2026-01-02 + +Fixes cache creation cost for Langchain with Anthropic + # 7.4.2 - 2025-12-22 feat: add `in_app_modules` option to control code variables capturing @@ -13,6 +17,7 @@ When using OpenAI stored prompts, the model is defined in the OpenAI dashboard r feat: Add automatic retries for feature flag requests Feature flag API requests now automatically retry on transient failures: + - Network errors (connection refused, DNS failures, timeouts) - Server errors (500, 502, 503, 504) - Up to 2 retries with exponential backoff (0.5s, 1s delays) diff --git a/posthog/ai/langchain/callbacks.py b/posthog/ai/langchain/callbacks.py index 68840a63..d92dc8f0 100644 --- a/posthog/ai/langchain/callbacks.py +++ b/posthog/ai/langchain/callbacks.py @@ -773,9 +773,11 @@ def _parse_usage_model( for mapped_key, dataclass_key in field_mapping.items() }, ) - # For Anthropic providers, LangChain reports input_tokens as the sum of input and cache read tokens. + # For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens. # Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens. - # For other providers (OpenAI, etc.), input_tokens already includes cache tokens as expected. + # Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API + # reports input_tokens as tokens NOT read from or used to create a cache. + # For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected. # Match logic consistent with plugin-server: exact match on provider OR substring match on model is_anthropic = False if provider and provider.lower() == "anthropic": @@ -783,14 +785,14 @@ def _parse_usage_model( elif model and "anthropic" in model.lower(): is_anthropic = True - if ( - is_anthropic - and normalized_usage.input_tokens - and normalized_usage.cache_read_tokens - ): - normalized_usage.input_tokens = max( - normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0 + if is_anthropic and normalized_usage.input_tokens: + cache_tokens = (normalized_usage.cache_read_tokens or 0) + ( + normalized_usage.cache_write_tokens or 0 ) + if cache_tokens > 0: + normalized_usage.input_tokens = max( + normalized_usage.input_tokens - cache_tokens, 0 + ) return normalized_usage diff --git a/posthog/test/ai/langchain/test_callbacks.py b/posthog/test/ai/langchain/test_callbacks.py index b9538532..1f7edba7 100644 --- a/posthog/test/ai/langchain/test_callbacks.py +++ b/posthog/test/ai/langchain/test_callbacks.py @@ -1638,6 +1638,95 @@ def test_anthropic_provider_subtracts_cache_tokens(mock_client): assert generation_args["properties"]["$ai_cache_read_input_tokens"] == 800 +def test_anthropic_provider_subtracts_cache_write_tokens(mock_client): + """Test that Anthropic provider correctly subtracts cache write tokens from input tokens.""" + from langchain_core.outputs import LLMResult, ChatGeneration + from langchain_core.messages import AIMessage + from uuid import uuid4 + + cb = CallbackHandler(mock_client) + run_id = uuid4() + + # Set up with Anthropic provider + cb._set_llm_metadata( + serialized={}, + run_id=run_id, + messages=[{"role": "user", "content": "test"}], + metadata={"ls_provider": "anthropic", "ls_model_name": "claude-3-sonnet"}, + ) + + # Response with cache creation: 1000 input (includes 800 being written to cache) + response = LLMResult( + generations=[ + [ + ChatGeneration( + message=AIMessage(content="Response"), + generation_info={ + "usage_metadata": { + "input_tokens": 1000, + "output_tokens": 50, + "cache_creation_input_tokens": 800, + } + }, + ) + ] + ], + llm_output={}, + ) + + cb._pop_run_and_capture_generation(run_id, None, response) + + generation_args = mock_client.capture.call_args_list[0][1] + assert generation_args["properties"]["$ai_input_tokens"] == 200 # 1000 - 800 + assert generation_args["properties"]["$ai_cache_creation_input_tokens"] == 800 + + +def test_anthropic_provider_subtracts_both_cache_read_and_write_tokens(mock_client): + """Test that Anthropic provider correctly subtracts both cache read and write tokens.""" + from langchain_core.outputs import LLMResult, ChatGeneration + from langchain_core.messages import AIMessage + from uuid import uuid4 + + cb = CallbackHandler(mock_client) + run_id = uuid4() + + # Set up with Anthropic provider + cb._set_llm_metadata( + serialized={}, + run_id=run_id, + messages=[{"role": "user", "content": "test"}], + metadata={"ls_provider": "anthropic", "ls_model_name": "claude-3-sonnet"}, + ) + + # Response with both cache read and creation + response = LLMResult( + generations=[ + [ + ChatGeneration( + message=AIMessage(content="Response"), + generation_info={ + "usage_metadata": { + "input_tokens": 2000, + "output_tokens": 50, + "cache_read_input_tokens": 800, + "cache_creation_input_tokens": 500, + } + }, + ) + ] + ], + llm_output={}, + ) + + cb._pop_run_and_capture_generation(run_id, None, response) + + generation_args = mock_client.capture.call_args_list[0][1] + # 2000 - 800 (read) - 500 (write) = 700 + assert generation_args["properties"]["$ai_input_tokens"] == 700 + assert generation_args["properties"]["$ai_cache_read_input_tokens"] == 800 + assert generation_args["properties"]["$ai_cache_creation_input_tokens"] == 500 + + def test_openai_cache_read_tokens(mock_client): """Test that OpenAI cache read tokens are captured correctly.""" prompt = ChatPromptTemplate.from_messages( @@ -2092,10 +2181,12 @@ def test_zero_input_tokens_with_cache_read(mock_client): assert generation_props["$ai_cache_read_input_tokens"] == 50 -def test_cache_write_tokens_not_subtracted_from_input(mock_client): - """Test that cache_creation_input_tokens (cache write) do NOT affect input_tokens. +def test_non_anthropic_cache_write_tokens_not_subtracted_from_input(mock_client): + """Test that cache_creation_input_tokens do NOT affect input_tokens for non-Anthropic providers. - Only cache_read_tokens should be subtracted from input_tokens, not cache_write_tokens. + When no provider metadata is set (or for non-Anthropic providers), cache tokens should + NOT be subtracted from input_tokens. This is because different providers report tokens + differently - only Anthropic's LangChain integration requires subtraction. """ prompt = ChatPromptTemplate.from_messages([("user", "Create cache")]) diff --git a/posthog/version.py b/posthog/version.py index 7baf050e..9efec676 100644 --- a/posthog/version.py +++ b/posthog/version.py @@ -1,4 +1,4 @@ -VERSION = "7.4.2" +VERSION = "7.4.3" if __name__ == "__main__": print(VERSION, end="") # noqa: T201