Internal change

stollem · copybara-github · commit a501a8bf7d2e · 2025-11-19T05:52:19.000-08:00
PiperOrigin-RevId: 834255021
diff --git a/gemma/activations.h b/gemma/activations.h
@@ -46,7 +46,8 @@ static inline float ChooseQueryScale(const ModelConfig& config) {
 struct AttentionActivations {
   AttentionActivations(
       const ModelConfig& config, const LayerConfig& layer_config,
-      size_t batch_size, size_t seq_len, const Allocator& allocator,
+      size_t batch_size, size_t seq_len, AttentionImpl attention_impl,
+      const Allocator& allocator,
       std::vector<hwy::AlignedFreeUniquePtr<uint8_t*[]>>& row_ptrs)
       :  // `vocab_size == 0` means it is for Vit part, VitAttention is still
          // MHA and does not use an external KV cache.
@@ -80,7 +81,8 @@ struct AttentionActivations {
                                layer_config.post_qk == PostQKType::HalfRope)),
         inv_timescale_global(CreateInvTimescale(
             allocator, layer_config.qkv_dim,
-            layer_config.post_qk == PostQKType::HalfRope, 1000000.0)) {
+            layer_config.post_qk == PostQKType::HalfRope, 1000000.0))
+           {
     // Batch size can be 0 in experimental code so do not assert.
     if (batch_size == 0) {
       static std::atomic_flag warned = ATOMIC_FLAG_INIT;
@@ -217,7 +219,8 @@ struct Activations {
 
         attention_impl(runtime_config.attention_impl),
         attention_storage(config, layer_config, batch_size, seq_len,
-                          ctx.allocator, row_ptrs),
+                          runtime_config.attention_impl, ctx.allocator,
+                          row_ptrs),
         attention(config, seq_len, attention_storage) {
     HWY_ASSERT(batch_size != 0);