Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
guardrail_converse,
guardrail_handling,
)
from opentelemetry.instrumentation.bedrock.prompt_caching import prompt_caching_handling
from opentelemetry.instrumentation.bedrock.prompt_caching import (
prompt_caching_converse_handling,
prompt_caching_handling,
)
from opentelemetry.instrumentation.bedrock.reusable_streaming_body import (
ReusableStreamingBody,
)
Expand Down Expand Up @@ -354,6 +357,7 @@ def _handle_call(span: Span, kwargs, response, metric_params, event_logger):
def _handle_converse(span, kwargs, response, metric_params, event_logger):
(provider, model_vendor, model) = _get_vendor_model(kwargs.get("modelId"))
guardrail_converse(span, response, provider, model, metric_params)
prompt_caching_converse_handling(response, provider, model, metric_params)

set_converse_model_span_attributes(span, provider, model, kwargs)

Expand Down Expand Up @@ -394,7 +398,11 @@ def wrap(*args, **kwargs):
role = event["messageStart"]["role"]
elif "metadata" in event:
# last message sent
metadata = event.get("metadata", {})
guardrail_converse(span, event["metadata"], provider, model, metric_params)
prompt_caching_converse_handling(
metadata, provider, model, metric_params
)
converse_usage_record(span, event["metadata"], metric_params)
span.end()
elif "messageStop" in event:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,45 @@ def prompt_caching_handling(headers, vendor, model, metric_params):
)
if write_cached_tokens > 0:
span.set_attribute(CacheSpanAttrs.CACHED, "write")


def prompt_caching_converse_handling(response, vendor, model, metric_params):
base_attrs = {
"gen_ai.system": vendor,
"gen_ai.response.model": model,
}
span = trace.get_current_span()
if not isinstance(span, trace.Span) or not span.is_recording():
return

usage = response.get("usage", {})
read_cached_tokens = usage.get("cache_read_input_tokens", 0)
write_cached_tokens = usage.get("cache_creation_input_tokens", 0)

if read_cached_tokens > 0:
if metric_params.prompt_caching:
metric_params.prompt_caching.add(
read_cached_tokens,
attributes={
**base_attrs,
CacheSpanAttrs.TYPE: "read",
},
)
span.set_attribute(CacheSpanAttrs.CACHED, "read")
span.set_attribute(
"gen_ai.usage.cache_read_input_tokens", read_cached_tokens
)

if write_cached_tokens > 0:
if metric_params.prompt_caching:
metric_params.prompt_caching.add(
write_cached_tokens,
attributes={
**base_attrs,
CacheSpanAttrs.TYPE: "write",
},
)
span.set_attribute(CacheSpanAttrs.CACHED, "write")
span.set_attribute(
"gen_ai.usage.cache_creation_input_tokens", write_cached_tokens
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": [{"text": "Hello, this is a test
prompt for caching."}]}], "inferenceConfig": {"maxTokens": 50}, "additionalModelRequestFields":
{"cacheControl": {"type": "ephemeral"}}}'
headers:
Content-Length:
- '211'
Content-Type:
- !!binary |
YXBwbGljYXRpb24vanNvbg==
User-Agent:
- !!binary |
Qm90bzMvMS4zNC4xNDUgbWQvQm90b2NvcmUjMS4zNC4xNDUgdWEvMi4wIG9zL3dpbmRvd3MjMTAg
bWQvYXJjaCNhbWQ2NCBsYW5nL3B5dGhvbiMzLjExLjggbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl
dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzQuMTQ1
X-Amz-Date:
- !!binary |
MjAyNTEwMzBUMTY1MTI4Wg==
amz-sdk-invocation-id:
- !!binary |
ZTFmM2VjZTMtM2VlMC00ZGZiLWE2MWEtYjBiYmU4M2U2NDFm
amz-sdk-request:
- !!binary |
YXR0ZW1wdD0x
method: POST
uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/converse
response:
body:
string: '{"message":"The security token included in the request is invalid."}'
headers:
Connection:
- keep-alive
Content-Length:
- '68'
Content-Type:
- application/json
Date:
- Thu, 30 Oct 2025 16:51:30 GMT
x-amzn-ErrorType:
- UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/
x-amzn-RequestId:
- f0fdf86b-d85a-4f16-8713-1b9b5fccbe25
status:
code: 403
message: Forbidden
version: 1
Comment on lines +1 to +47
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Cassette contains authentication failure instead of valid caching response.

The cassette records a 403 Forbidden response with "The security token included in the request is invalid" rather than a successful API response with caching metadata. This means the test test_titan_converse_with_caching will fail because:

  • Line 1086-1088: Expects usage_write["cache_creation_input_tokens"] > 0, but the response has no usage field
  • Line 1091-1099: Expects a second successful call with cache read data

Per the PR objectives, you need AWS credentials to record a valid cassette. Ask the maintainer to run:

and commit the generated cassette.


🏁 Script executed:

poetry run pytest packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py::test_titan_converse_with_caching --record-mode=once

Length of output: 44



Once I locate the test file, I'll examine the specific test to verify the assertions mentioned in the review comment.

---

🏁 Script executed:

```shell
#!/bin/bash
# Find and inspect the test_titan.py file and locate the test_titan_converse_with_caching test
fd "test_titan\.py" --type f

Length of output: 77



🏁 Script executed:

#!/bin/bash
# Inspect the test function test_titan_converse_with_caching
ast-grep --pattern 'def test_titan_converse_with_caching($_) {
  $$$
}'

Length of output: 0



🏁 Script executed:

#!/bin/bash
# Search for the test function and display context
rg -n "def test_titan_converse_with_caching" --type py -A 50

Length of output: 6541


Cassette is invalid and will cause test failure; AWS credentials required to regenerate.

The cassette indeed contains a 403 Forbidden response instead of a valid caching response. The test will fail at line 1086 when attempting to access response_write["usage"] from the error response, which lacks this field. The cassette must be regenerated with valid AWS credentials to record a successful response with cache metadata.

🤖 Prompt for AI Agents
packages/opentelemetry-instrumentation-bedrock/tests/traces/cassettes/test_titan/test_titan_converse_with_caching.yaml
lines 1-47: the cassette records a 403 UnrecognizedClientException (invalid AWS
credentials) instead of a successful converse response, causing tests to crash
when they expect response_write["usage"]; regenerate or replace this cassette by
re-running the test recording with valid AWS credentials so the cassette
contains a successful response that includes cache/usage metadata, and then
commit the new cassette (or remove the failing cassette and update the test to
use a properly recorded fixture).

Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,38 @@
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.instrumentation.bedrock import PromptCaching


def get_metric(resource_metrics, name):
for rm in resource_metrics:
for sm in rm.scope_metrics:
for metric in sm.metrics:
if metric.name == name:
return metric
raise Exception(f"No metric found with name {name}")


def assert_metric(reader, usage, is_read=False):
metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
assert len(resource_metrics) > 0

m = get_metric(resource_metrics, PromptCaching.LLM_BEDROCK_PROMPT_CACHING)
found_read = False
found_write = False
for data_point in m.data.data_points:
if data_point.attributes[CacheSpanAttrs.TYPE] == "read":
found_read = True
assert data_point.value == usage["cache_read_input_tokens"]
elif data_point.attributes[CacheSpanAttrs.TYPE] == "write":
found_write = True
assert data_point.value == usage["cache_creation_input_tokens"]

if is_read:
assert found_read
else:
assert found_write


@pytest.mark.vcr
Expand Down Expand Up @@ -1075,3 +1107,65 @@ def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict
else:
assert log.log_record.body
assert dict(log.log_record.body) == expected_content


@pytest.mark.vcr
def test_anthropic_converse_with_caching(instrument_legacy, brt, span_exporter, reader):
response_write = brt.converse(
modelId="anthropic.claude-3-haiku-20240307-v1:0",
messages=[
{
"role": "user",
"content": [
{"text": "Hello, this is a test prompt for caching."},
{"cachePoint": {"type": "default"}},
],
}
],
inferenceConfig={"maxTokens": 50},
)

usage_write = response_write["usage"]
assert usage_write["cache_read_input_tokens"] == 0
assert usage_write["cache_creation_input_tokens"] > 0

response_read = brt.converse(
modelId="anthropic.claude-3-haiku-20240307-v1:0",
messages=[
{
"role": "user",
"content": [
{"text": "Hello, this is a test prompt for caching."},
{"cachePoint": {"type": "default"}},
],
}
],
inferenceConfig={"maxTokens": 50},
)
usage_read = response_read["usage"]
assert usage_read["cache_read_input_tokens"] > 0
assert usage_read["cache_creation_input_tokens"] == 0

spans = span_exporter.get_finished_spans()
assert len(spans) == 2

span_write = spans[0]
assert span_write.name == "bedrock.converse"
attributes_write = span_write.attributes
assert attributes_write[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-haiku-20240307-v1:0"
assert attributes_write[CacheSpanAttrs.CACHED] == "write"
assert attributes_write["gen_ai.usage.cache_creation_input_tokens"] == usage_write["cache_creation_input_tokens"]

span_read = spans[1]
assert span_read.name == "bedrock.converse"
attributes_read = span_read.attributes
assert attributes_read[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-haiku-20240307-v1:0"
assert attributes_read[CacheSpanAttrs.CACHED] == "read"
assert attributes_read["gen_ai.usage.cache_read_input_tokens"] == usage_read["cache_read_input_tokens"]

cumulative_usage = {
"cache_creation_input_tokens": usage_write["cache_creation_input_tokens"],
"cache_read_input_tokens": usage_read["cache_read_input_tokens"],
}
assert_metric(reader, cumulative_usage, is_read=False)
assert_metric(reader, cumulative_usage, is_read=True)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv_ai import SpanAttributes


@pytest.mark.vcr
def test_titan_completion(instrument_legacy, brt, span_exporter, log_exporter):
Expand Down Expand Up @@ -1036,4 +1036,4 @@ def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict
assert not log.log_record.body
else:
assert log.log_record.body
assert dict(log.log_record.body) == expected_content
assert dict(log.log_record.body) == expected_content