diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py index 6c32683ed3..0d88068e58 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/__init__.py @@ -22,7 +22,10 @@ guardrail_converse, guardrail_handling, ) -from opentelemetry.instrumentation.bedrock.prompt_caching import prompt_caching_handling +from opentelemetry.instrumentation.bedrock.prompt_caching import ( + prompt_caching_converse_handling, + prompt_caching_handling, +) from opentelemetry.instrumentation.bedrock.reusable_streaming_body import ( ReusableStreamingBody, ) @@ -354,6 +357,7 @@ def _handle_call(span: Span, kwargs, response, metric_params, event_logger): def _handle_converse(span, kwargs, response, metric_params, event_logger): (provider, model_vendor, model) = _get_vendor_model(kwargs.get("modelId")) guardrail_converse(span, response, provider, model, metric_params) + prompt_caching_converse_handling(response, provider, model, metric_params) set_converse_model_span_attributes(span, provider, model, kwargs) @@ -394,7 +398,11 @@ def wrap(*args, **kwargs): role = event["messageStart"]["role"] elif "metadata" in event: # last message sent + metadata = event.get("metadata", {}) guardrail_converse(span, event["metadata"], provider, model, metric_params) + prompt_caching_converse_handling( + metadata, provider, model, metric_params + ) converse_usage_record(span, event["metadata"], metric_params) span.end() elif "messageStop" in event: diff --git a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py index b94dc66127..f98d57978b 100644 --- a/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py +++ b/packages/opentelemetry-instrumentation-bedrock/opentelemetry/instrumentation/bedrock/prompt_caching.py @@ -41,3 +41,45 @@ def prompt_caching_handling(headers, vendor, model, metric_params): ) if write_cached_tokens > 0: span.set_attribute(CacheSpanAttrs.CACHED, "write") + + +def prompt_caching_converse_handling(response, vendor, model, metric_params): + base_attrs = { + "gen_ai.system": vendor, + "gen_ai.response.model": model, + } + span = trace.get_current_span() + if not isinstance(span, trace.Span) or not span.is_recording(): + return + + usage = response.get("usage", {}) + read_cached_tokens = usage.get("cache_read_input_tokens", 0) + write_cached_tokens = usage.get("cache_creation_input_tokens", 0) + + if read_cached_tokens > 0: + if metric_params.prompt_caching: + metric_params.prompt_caching.add( + read_cached_tokens, + attributes={ + **base_attrs, + CacheSpanAttrs.TYPE: "read", + }, + ) + span.set_attribute(CacheSpanAttrs.CACHED, "read") + span.set_attribute( + "gen_ai.usage.cache_read_input_tokens", read_cached_tokens + ) + + if write_cached_tokens > 0: + if metric_params.prompt_caching: + metric_params.prompt_caching.add( + write_cached_tokens, + attributes={ + **base_attrs, + CacheSpanAttrs.TYPE: "write", + }, + ) + span.set_attribute(CacheSpanAttrs.CACHED, "write") + span.set_attribute( + "gen_ai.usage.cache_creation_input_tokens", write_cached_tokens + ) diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/cassettes/test_titan/test_titan_converse_with_caching.yaml b/packages/opentelemetry-instrumentation-bedrock/tests/traces/cassettes/test_titan/test_titan_converse_with_caching.yaml new file mode 100644 index 0000000000..42a9d329df --- /dev/null +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/cassettes/test_titan/test_titan_converse_with_caching.yaml @@ -0,0 +1,47 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": [{"text": "Hello, this is a test + prompt for caching."}]}], "inferenceConfig": {"maxTokens": 50}, "additionalModelRequestFields": + {"cacheControl": {"type": "ephemeral"}}}' + headers: + Content-Length: + - '211' + Content-Type: + - !!binary | + YXBwbGljYXRpb24vanNvbg== + User-Agent: + - !!binary | + Qm90bzMvMS4zNC4xNDUgbWQvQm90b2NvcmUjMS4zNC4xNDUgdWEvMi4wIG9zL3dpbmRvd3MjMTAg + bWQvYXJjaCNhbWQ2NCBsYW5nL3B5dGhvbiMzLjExLjggbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl + dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzQuMTQ1 + X-Amz-Date: + - !!binary | + MjAyNTEwMzBUMTY1MTI4Wg== + amz-sdk-invocation-id: + - !!binary | + ZTFmM2VjZTMtM2VlMC00ZGZiLWE2MWEtYjBiYmU4M2U2NDFm + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0x + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/converse + response: + body: + string: '{"message":"The security token included in the request is invalid."}' + headers: + Connection: + - keep-alive + Content-Length: + - '68' + Content-Type: + - application/json + Date: + - Thu, 30 Oct 2025 16:51:30 GMT + x-amzn-ErrorType: + - UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/ + x-amzn-RequestId: + - f0fdf86b-d85a-4f16-8713-1b9b5fccbe25 + status: + code: 403 + message: Forbidden +version: 1 diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py index bb176d5ccf..1dfa15ff40 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_anthropic.py @@ -10,6 +10,38 @@ gen_ai_attributes as GenAIAttributes, ) from opentelemetry.semconv_ai import SpanAttributes +from opentelemetry.instrumentation.bedrock import PromptCaching + + +def get_metric(resource_metrics, name): + for rm in resource_metrics: + for sm in rm.scope_metrics: + for metric in sm.metrics: + if metric.name == name: + return metric + raise Exception(f"No metric found with name {name}") + + +def assert_metric(reader, usage, is_read=False): + metrics_data = reader.get_metrics_data() + resource_metrics = metrics_data.resource_metrics + assert len(resource_metrics) > 0 + + m = get_metric(resource_metrics, PromptCaching.LLM_BEDROCK_PROMPT_CACHING) + found_read = False + found_write = False + for data_point in m.data.data_points: + if data_point.attributes[CacheSpanAttrs.TYPE] == "read": + found_read = True + assert data_point.value == usage["cache_read_input_tokens"] + elif data_point.attributes[CacheSpanAttrs.TYPE] == "write": + found_write = True + assert data_point.value == usage["cache_creation_input_tokens"] + + if is_read: + assert found_read + else: + assert found_write @pytest.mark.vcr @@ -1075,3 +1107,65 @@ def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict else: assert log.log_record.body assert dict(log.log_record.body) == expected_content + + +@pytest.mark.vcr +def test_anthropic_converse_with_caching(instrument_legacy, brt, span_exporter, reader): + response_write = brt.converse( + modelId="anthropic.claude-3-haiku-20240307-v1:0", + messages=[ + { + "role": "user", + "content": [ + {"text": "Hello, this is a test prompt for caching."}, + {"cachePoint": {"type": "default"}}, + ], + } + ], + inferenceConfig={"maxTokens": 50}, + ) + + usage_write = response_write["usage"] + assert usage_write["cache_read_input_tokens"] == 0 + assert usage_write["cache_creation_input_tokens"] > 0 + + response_read = brt.converse( + modelId="anthropic.claude-3-haiku-20240307-v1:0", + messages=[ + { + "role": "user", + "content": [ + {"text": "Hello, this is a test prompt for caching."}, + {"cachePoint": {"type": "default"}}, + ], + } + ], + inferenceConfig={"maxTokens": 50}, + ) + usage_read = response_read["usage"] + assert usage_read["cache_read_input_tokens"] > 0 + assert usage_read["cache_creation_input_tokens"] == 0 + + spans = span_exporter.get_finished_spans() + assert len(spans) == 2 + + span_write = spans[0] + assert span_write.name == "bedrock.converse" + attributes_write = span_write.attributes + assert attributes_write[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-haiku-20240307-v1:0" + assert attributes_write[CacheSpanAttrs.CACHED] == "write" + assert attributes_write["gen_ai.usage.cache_creation_input_tokens"] == usage_write["cache_creation_input_tokens"] + + span_read = spans[1] + assert span_read.name == "bedrock.converse" + attributes_read = span_read.attributes + assert attributes_read[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "claude-3-haiku-20240307-v1:0" + assert attributes_read[CacheSpanAttrs.CACHED] == "read" + assert attributes_read["gen_ai.usage.cache_read_input_tokens"] == usage_read["cache_read_input_tokens"] + + cumulative_usage = { + "cache_creation_input_tokens": usage_write["cache_creation_input_tokens"], + "cache_read_input_tokens": usage_read["cache_read_input_tokens"], + } + assert_metric(reader, cumulative_usage, is_read=False) + assert_metric(reader, cumulative_usage, is_read=True) \ No newline at end of file diff --git a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py index a8f4e0a316..8ce97fbe5e 100644 --- a/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py +++ b/packages/opentelemetry-instrumentation-bedrock/tests/traces/test_titan.py @@ -9,7 +9,7 @@ gen_ai_attributes as GenAIAttributes, ) from opentelemetry.semconv_ai import SpanAttributes - + @pytest.mark.vcr def test_titan_completion(instrument_legacy, brt, span_exporter, log_exporter): @@ -1036,4 +1036,4 @@ def assert_message_in_logs(log: LogData, event_name: str, expected_content: dict assert not log.log_record.body else: assert log.log_record.body - assert dict(log.log_record.body) == expected_content + assert dict(log.log_record.body) == expected_content \ No newline at end of file