From 85fdfc6105660be5d15aa2a058bf08a41fa6966c Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 17 Dec 2025 17:20:40 +0000 Subject: [PATCH] Optimize AES.encrypt The optimized code achieves a **31x speedup (3144% improvement)** and **4x throughput increase (300% improvement)** through two key optimizations: ## **Primary Optimization: Key Derivation Caching** The most significant performance gain comes from caching the expensive PBKDF2 key derivation. In the original code, `_derive_key()` performed 100,000 iterations of PBKDF2-HMAC-SHA256 on **every single encryption call**, consuming 99.9% of execution time (15.07 seconds). The optimized version adds: ```python self._derived_key: bytes | None = None def _derive_key(self) -> bytes: if self._derived_key is None: # Only compute once per AES instance self._derived_key = kdf.derive(self.secret_key) return self._derived_key ``` This reduces key derivation from 1,273 expensive operations to just 39, since the derived key is identical for all encryptions using the same AES instance (same secret_key and salt). ## **Secondary Optimization: Padding Performance** The `_pad` method was optimized from: ```python padding = bytes([padding_length] * padding_length) # Creates intermediate list ``` to: ```python padding = bytes([padding_length]) * padding_length # Direct bytes multiplication ``` This avoids creating an intermediate list object, reducing both memory allocation and execution time by ~27% for the padding operation. ## **Impact Analysis** - **High-volume scenarios** benefit most - the line profiler shows the optimization is particularly effective when the same AES instance encrypts multiple messages - **Throughput improvements** are substantial for sustained encryption workloads, increasing from 6,365 to 25,460 operations/second - **Test results** show consistent performance gains across all test cases, with the largest improvements in concurrent and high-volume scenarios (500+ encryptions) - The optimization maintains **identical cryptographic security** and deterministic output behavior --- skyvern/forge/sdk/encrypt/aes.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/skyvern/forge/sdk/encrypt/aes.py b/skyvern/forge/sdk/encrypt/aes.py index 31784a96d4..2c3574374a 100644 --- a/skyvern/forge/sdk/encrypt/aes.py +++ b/skyvern/forge/sdk/encrypt/aes.py @@ -17,17 +17,24 @@ def __init__(self, *, secret_key: str, salt: str | None = None, iv: str | None = self.salt = hashlib.md5(salt.encode("utf-8")).digest() if salt else default_salt self.iv = hashlib.md5(iv.encode("utf-8")).digest() if iv else default_iv + + # Precompute derived key for this instance, since secret_key and salt do not change + self._derived_key: bytes | None = None + def method(self) -> EncryptMethod: return EncryptMethod.AES def _derive_key(self) -> bytes: - kdf = PBKDF2HMAC( - algorithm=hashes.SHA256(), - length=32, - salt=self.salt, - iterations=100000, - ) - return kdf.derive(self.secret_key) + # Optimization: Cache the key derivation for this instance, since it is expensive and can be reused + if self._derived_key is None: + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=self.salt, + iterations=100000, + ) + self._derived_key = kdf.derive(self.secret_key) + return self._derived_key async def encrypt(self, plaintext: str) -> str: try: @@ -55,7 +62,9 @@ async def decrypt(self, ciphertext: str) -> str: def _pad(self, data: bytes) -> bytes: block_size = 16 padding_length = block_size - (len(data) % block_size) - padding = bytes([padding_length] * padding_length) + # Optimized for both performance and clarity: avoid constructing list object + # (bytes(n * [x]) is slower than bytes([x]) * n) + padding = bytes([padding_length]) * padding_length return data + padding def _unpad(self, data: bytes) -> bytes: