From 7168d1f2e735b4b2bbf2b2582c83833960723fef Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Fri, 20 Feb 2026 13:35:10 +0100
Subject: [PATCH 1/4] fix(anthropic): fix token accounting

---
 sentry_sdk/integrations/anthropic.py          |  17 +-
 .../integrations/anthropic/test_anthropic.py  | 150 ++++++++++++++++++
 2 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
index b131e3381d..6bf6dedd30 100644
--- a/sentry_sdk/integrations/anthropic.py
+++ b/sentry_sdk/integrations/anthropic.py
@@ -108,6 +108,11 @@ def _get_token_usage(result: "Messages") -> "tuple[int, int, int, int]":
         ):
             cache_write_input_tokens = usage.cache_creation_input_tokens
 
+    # Anthropic's input_tokens excludes cached/cache_write tokens.
+    # Normalize to total input tokens so downstream cost calculations
+    # (input_tokens - cached) don't produce negative values.
+    input_tokens += cache_read_input_tokens + cache_write_input_tokens
+
     return (
         input_tokens,
         output_tokens,
@@ -466,11 +471,15 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
                     )
                     yield event
 
+                # Anthropic's input_tokens excludes cached/cache_write tokens.
+                # Normalize to total input tokens for correct cost calculations.
+                total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
+
                 _set_output_data(
                     span=span,
                     integration=integration,
                     model=model,
-                    input_tokens=usage.input_tokens,
+                    input_tokens=total_input,
                     output_tokens=usage.output_tokens,
                     cache_read_input_tokens=usage.cache_read_input_tokens,
                     cache_write_input_tokens=usage.cache_write_input_tokens,
@@ -496,11 +505,15 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
                     )
                     yield event
 
+                # Anthropic's input_tokens excludes cached/cache_write tokens.
+                # Normalize to total input tokens for correct cost calculations.
+                total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
+
                 _set_output_data(
                     span=span,
                     integration=integration,
                     model=model,
-                    input_tokens=usage.input_tokens,
+                    input_tokens=total_input,
                     output_tokens=usage.output_tokens,
                     cache_read_input_tokens=usage.cache_read_input_tokens,
                     cache_write_input_tokens=usage.cache_write_input_tokens,
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 49164adf32..7f119c6912 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -2265,6 +2265,156 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
+def test_input_tokens_include_cached_nonstreaming(sentry_init, capture_events):
+    """
+    Test that gen_ai.usage.input_tokens includes cached tokens.
+
+    Anthropic's usage.input_tokens excludes cached/cache_write tokens,
+    but gen_ai.usage.input_tokens should be the TOTAL input tokens
+    (including cached + cache_write) so that downstream cost calculations
+    don't produce negative values.
+
+    See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
+    """
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+    client = Anthropic(api_key="z")
+
+    # Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
+    # cache_read=80 and cache_write=20 are separate
+    # Total input tokens processed = 100 + 80 + 20 = 200
+    client.messages._post = mock.Mock(
+        return_value=Message(
+            id="id",
+            model="claude-3-5-sonnet-20241022",
+            role="assistant",
+            content=[TextBlock(type="text", text="Response")],
+            type="message",
+            usage=Usage(
+                input_tokens=100,
+                output_tokens=50,
+                cache_read_input_tokens=80,
+                cache_creation_input_tokens=20,
+            ),
+        )
+    )
+
+    with start_transaction(name="anthropic"):
+        client.messages.create(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Hello"}],
+            model="claude-3-5-sonnet-20241022",
+        )
+
+    (span,) = events[0]["spans"]
+
+    # input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+
+    # total_tokens should include the full input count
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250  # 200 + 50
+
+    # Cache fields should still be reported correctly
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+
+
+def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
+    """
+    Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses.
+
+    Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
+    leading to negative cost calculations when cache_read > input_tokens.
+    """
+    client = Anthropic(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = [
+        MessageStartEvent(
+            type="message_start",
+            message=Message(
+                id="id",
+                model="claude-3-5-sonnet-20241022",
+                role="assistant",
+                content=[],
+                type="message",
+                usage=Usage(
+                    input_tokens=100,
+                    output_tokens=0,
+                    cache_read_input_tokens=80,
+                    cache_creation_input_tokens=20,
+                ),
+            ),
+        ),
+        MessageDeltaEvent(
+            type="message_delta",
+            delta=Delta(stop_reason="end_turn"),
+            usage=MessageDeltaUsage(output_tokens=50),
+        ),
+    ]
+
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+    client.messages._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="anthropic"):
+        for _ in client.messages.create(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Hello"}],
+            model="claude-3-5-sonnet-20241022",
+            stream=True,
+        ):
+            pass
+
+    (span,) = events[0]["spans"]
+
+    # input_tokens should be total: 100 + 80 + 20 = 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+
+    # total_tokens should include the full input count
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250  # 200 + 50
+
+    # Cache fields should still be reported correctly
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+
+
+def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
+    """
+    Test that input_tokens is unchanged when there are no cached tokens.
+    Ensures the fix doesn't break the non-caching case.
+    """
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+    client = Anthropic(api_key="z")
+
+    client.messages._post = mock.Mock(
+        return_value=Message(
+            id="id",
+            model="claude-3-5-sonnet-20241022",
+            role="assistant",
+            content=[TextBlock(type="text", text="Response")],
+            type="message",
+            usage=Usage(
+                input_tokens=100,
+                output_tokens=50,
+            ),
+        )
+    )
+
+    with start_transaction(name="anthropic"):
+        client.messages.create(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "Hello"}],
+            model="claude-3-5-sonnet-20241022",
+        )
+
+    (span,) = events[0]["spans"]
+
+    # Without caching, input_tokens should remain as-is
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 150  # 100 + 50
+
+
 def test_cache_tokens_streaming(sentry_init, capture_events):
     """Test cache tokens are tracked for streaming responses."""
     client = Anthropic(api_key="z")

From 120da10d4e287f698b4970c1fac9ae13bf29e140 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Fri, 20 Feb 2026 13:37:42 +0100
Subject: [PATCH 2/4] formatting

---
 sentry_sdk/integrations/anthropic.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py
index 6bf6dedd30..90aec4dd6c 100644
--- a/sentry_sdk/integrations/anthropic.py
+++ b/sentry_sdk/integrations/anthropic.py
@@ -473,7 +473,11 @@ def new_iterator() -> "Iterator[MessageStreamEvent]":
 
                 # Anthropic's input_tokens excludes cached/cache_write tokens.
                 # Normalize to total input tokens for correct cost calculations.
-                total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
+                total_input = (
+                    usage.input_tokens
+                    + (usage.cache_read_input_tokens or 0)
+                    + (usage.cache_write_input_tokens or 0)
+                )
 
                 _set_output_data(
                     span=span,
@@ -507,7 +511,11 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]":
 
                 # Anthropic's input_tokens excludes cached/cache_write tokens.
                 # Normalize to total input tokens for correct cost calculations.
-                total_input = usage.input_tokens + (usage.cache_read_input_tokens or 0) + (usage.cache_write_input_tokens or 0)
+                total_input = (
+                    usage.input_tokens
+                    + (usage.cache_read_input_tokens or 0)
+                    + (usage.cache_write_input_tokens or 0)
+                )
 
                 _set_output_data(
                     span=span,

From 6a5c43c4468a658dae2b8fd89bf2391614d888bf Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Fri, 20 Feb 2026 13:39:28 +0100
Subject: [PATCH 3/4] adapt tests

---
 .../integrations/anthropic/test_anthropic.py  | 148 +++++++++++-------
 1 file changed, 93 insertions(+), 55 deletions(-)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 7f119c6912..07239ee179 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -2265,36 +2265,34 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
 
-def test_input_tokens_include_cached_nonstreaming(sentry_init, capture_events):
+def test_input_tokens_include_cache_write_nonstreaming(sentry_init, capture_events):
     """
-    Test that gen_ai.usage.input_tokens includes cached tokens.
+    Test that gen_ai.usage.input_tokens includes cache_write tokens (non-streaming).
 
-    Anthropic's usage.input_tokens excludes cached/cache_write tokens,
-    but gen_ai.usage.input_tokens should be the TOTAL input tokens
-    (including cached + cache_write) so that downstream cost calculations
-    don't produce negative values.
+    Reproduces a real Anthropic cache-write response. Anthropic's usage.input_tokens
+    only counts non-cached tokens, but gen_ai.usage.input_tokens should be the TOTAL
+    so downstream cost calculations don't produce negative values.
 
-    See: negative gen_ai.cost.input_tokens bug when cache_read > input_tokens.
+    Real Anthropic response (from E2E test):
+        Usage(input_tokens=19, output_tokens=14,
+              cache_creation_input_tokens=2846, cache_read_input_tokens=0)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
     events = capture_events()
     client = Anthropic(api_key="z")
 
-    # Simulate Anthropic response where input_tokens=100 EXCLUDES cached tokens
-    # cache_read=80 and cache_write=20 are separate
-    # Total input tokens processed = 100 + 80 + 20 = 200
     client.messages._post = mock.Mock(
         return_value=Message(
             id="id",
-            model="claude-3-5-sonnet-20241022",
+            model="claude-sonnet-4-20250514",
             role="assistant",
-            content=[TextBlock(type="text", text="Response")],
+            content=[TextBlock(type="text", text="3 + 3 equals 6.")],
             type="message",
             usage=Usage(
-                input_tokens=100,
-                output_tokens=50,
-                cache_read_input_tokens=80,
-                cache_creation_input_tokens=20,
+                input_tokens=19,
+                output_tokens=14,
+                cache_read_input_tokens=0,
+                cache_creation_input_tokens=2846,
             ),
         )
     )
@@ -2302,29 +2300,72 @@ def test_input_tokens_include_cached_nonstreaming(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(
             max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
+            messages=[{"role": "user", "content": "What is 3+3?"}],
+            model="claude-sonnet-4-20250514",
         )
 
     (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 100 (non-cached) + 80 (cache_read) + 20 (cache_write) = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    # input_tokens should be total: 19 (non-cached) + 2846 (cache_write) = 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 0
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 2846
 
-    # total_tokens should include the full input count
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250  # 200 + 50
 
-    # Cache fields should still be reported correctly
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+def test_input_tokens_include_cache_read_nonstreaming(sentry_init, capture_events):
+    """
+    Test that gen_ai.usage.input_tokens includes cache_read tokens (non-streaming).
+
+    Reproduces a real Anthropic cache-hit response. This is the scenario that
+    caused negative gen_ai.cost.input_tokens: input_tokens=19 but cached=2846,
+    so the backend computed 19 - 2846 = -2827 "regular" tokens.
+
+    Real Anthropic response (from E2E test):
+        Usage(input_tokens=19, output_tokens=14,
+              cache_creation_input_tokens=0, cache_read_input_tokens=2846)
+    """
+    sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+    client = Anthropic(api_key="z")
+
+    client.messages._post = mock.Mock(
+        return_value=Message(
+            id="id",
+            model="claude-sonnet-4-20250514",
+            role="assistant",
+            content=[TextBlock(type="text", text="5 + 5 = 10.")],
+            type="message",
+            usage=Usage(
+                input_tokens=19,
+                output_tokens=14,
+                cache_read_input_tokens=2846,
+                cache_creation_input_tokens=0,
+            ),
+        )
+    )
 
+    with start_transaction(name="anthropic"):
+        client.messages.create(
+            max_tokens=1024,
+            messages=[{"role": "user", "content": "What is 5+5?"}],
+            model="claude-sonnet-4-20250514",
+        )
+
+    (span,) = events[0]["spans"]
 
-def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
+    # input_tokens should be total: 19 (non-cached) + 2846 (cache_read) = 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
+
+
+def test_input_tokens_include_cache_read_streaming(sentry_init, capture_events):
     """
-    Test that gen_ai.usage.input_tokens includes cached tokens for streaming responses.
+    Test that gen_ai.usage.input_tokens includes cache_read tokens (streaming).
 
-    Same bug as non-streaming: Anthropic's input_tokens excludes cached tokens,
-    leading to negative cost calculations when cache_read > input_tokens.
+    Same cache-hit scenario as non-streaming, using realistic streaming events.
     """
     client = Anthropic(api_key="z")
     returned_stream = Stream(cast_to=None, response=None, client=client)
@@ -2333,22 +2374,22 @@ def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
             type="message_start",
             message=Message(
                 id="id",
-                model="claude-3-5-sonnet-20241022",
+                model="claude-sonnet-4-20250514",
                 role="assistant",
                 content=[],
                 type="message",
                 usage=Usage(
-                    input_tokens=100,
+                    input_tokens=19,
                     output_tokens=0,
-                    cache_read_input_tokens=80,
-                    cache_creation_input_tokens=20,
+                    cache_read_input_tokens=2846,
+                    cache_creation_input_tokens=0,
                 ),
             ),
         ),
         MessageDeltaEvent(
             type="message_delta",
             delta=Delta(stop_reason="end_turn"),
-            usage=MessageDeltaUsage(output_tokens=50),
+            usage=MessageDeltaUsage(output_tokens=14),
         ),
     ]
 
@@ -2359,29 +2400,27 @@ def test_input_tokens_include_cached_streaming(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         for _ in client.messages.create(
             max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
+            messages=[{"role": "user", "content": "What is 5+5?"}],
+            model="claude-sonnet-4-20250514",
             stream=True,
         ):
             pass
 
     (span,) = events[0]["spans"]
 
-    # input_tokens should be total: 100 + 80 + 20 = 200
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
-
-    # total_tokens should include the full input count
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250  # 200 + 50
-
-    # Cache fields should still be reported correctly
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
+    # input_tokens should be total: 19 + 2846 = 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 2865
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 2879  # 2865 + 14
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 2846
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 0
 
 
 def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
     """
     Test that input_tokens is unchanged when there are no cached tokens.
-    Ensures the fix doesn't break the non-caching case.
+
+    Real Anthropic response (from E2E test, simple call without caching):
+        Usage(input_tokens=20, output_tokens=12)
     """
     sentry_init(integrations=[AnthropicIntegration()], traces_sample_rate=1.0)
     events = capture_events()
@@ -2390,13 +2429,13 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
     client.messages._post = mock.Mock(
         return_value=Message(
             id="id",
-            model="claude-3-5-sonnet-20241022",
+            model="claude-sonnet-4-20250514",
             role="assistant",
-            content=[TextBlock(type="text", text="Response")],
+            content=[TextBlock(type="text", text="2+2 equals 4.")],
             type="message",
             usage=Usage(
-                input_tokens=100,
-                output_tokens=50,
+                input_tokens=20,
+                output_tokens=12,
             ),
         )
     )
@@ -2404,15 +2443,14 @@ def test_input_tokens_unchanged_without_caching(sentry_init, capture_events):
     with start_transaction(name="anthropic"):
         client.messages.create(
             max_tokens=1024,
-            messages=[{"role": "user", "content": "Hello"}],
-            model="claude-3-5-sonnet-20241022",
+            messages=[{"role": "user", "content": "What is 2+2?"}],
+            model="claude-sonnet-4-20250514",
         )
 
     (span,) = events[0]["spans"]
 
-    # Without caching, input_tokens should remain as-is
-    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 100
-    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 150  # 100 + 50
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 20
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 32  # 20 + 12
 
 
 def test_cache_tokens_streaming(sentry_init, capture_events):

From df065fb64419ff70c539c4e27a083599676de316 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Fri, 20 Feb 2026 14:45:23 +0100
Subject: [PATCH 4/4] adapt existing tests so they would have failed

---
 tests/integrations/anthropic/test_anthropic.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
index 07239ee179..4361ba9629 100644
--- a/tests/integrations/anthropic/test_anthropic.py
+++ b/tests/integrations/anthropic/test_anthropic.py
@@ -2261,6 +2261,10 @@ def test_cache_tokens_nonstreaming(sentry_init, capture_events):
         )
 
     (span,) = events[0]["spans"]
+    # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 50
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 250
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20
 
@@ -2495,5 +2499,9 @@ def test_cache_tokens_streaming(sentry_init, capture_events):
             pass
 
     (span,) = events[0]["spans"]
+    # input_tokens normalized: 100 + 80 (cache_read) + 20 (cache_write) = 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 200
+    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 10
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 210
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED] == 80
     assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20