From 3b122708e95e1011bb985e677965263028a8060c Mon Sep 17 00:00:00 2001 From: Byron Pullutasig <115118857+bpulluta@users.noreply.github.com> Date: Thu, 28 May 2026 15:40:51 -0600 Subject: [PATCH 1/6] Add OpenAI base URL env support and JSON parse fallback --- compass/llm/config.py | 8 +++ compass/utilities/parsing.py | 49 +++++++++++++++-- tests/python/unit/llm/test_config.py | 54 +++++++++++++++++++ .../unit/utilities/test_utilities_parsing.py | 8 +++ 4 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 tests/python/unit/llm/test_config.py diff --git a/compass/llm/config.py b/compass/llm/config.py index 487f14a5e..2acf5e3e7 100644 --- a/compass/llm/config.py +++ b/compass/llm/config.py @@ -200,6 +200,14 @@ def client_kwargs(self): for key, env_var in arg_env_pairs: if self._client_kwargs.get(key) is None: self._client_kwargs[key] = os.environ.get(env_var) + elif self.client_type == "openai": + arg_env_pairs = [ + ("api_key", "OPENAI_API_KEY"), + ("base_url", "OPENAI_BASE_URL"), + ] + for key, env_var in arg_env_pairs: + if self._client_kwargs.get(key) is None: + self._client_kwargs[key] = os.environ.get(env_var) return self._client_kwargs diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index ce7e79f4b..7e4d977d4 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -39,16 +39,18 @@ def llm_response_as_json(content): Returns ------- - dict + object Parsed JSON structure. When parsing fails, the function returns an empty dictionary. Notes ----- The parser strips Markdown code fences, coerces Python-style - booleans to lowercase JSON literals, and logs the raw response on - decode failure. The logging includes guidance for increasing token - limits or updating prompts. + booleans to lowercase JSON literals, and first attempts strict JSON + decoding. If strict decoding fails, the parser attempts to recover + the first valid JSON object or array embedded in the response. If + recovery also fails, the raw response is logged with guidance for + prompt/token adjustments. """ content = clean_backticks_from_llm_response(content) content = content.removeprefix("json").lstrip("\n") @@ -56,6 +58,10 @@ def llm_response_as_json(content): try: content = json.loads(content) except json.decoder.JSONDecodeError: + parsed_content = _parse_first_json_payload(content) + if parsed_content is not None: + return parsed_content + logger.exception( "LLM returned improperly formatted JSON. " "This is likely due to the completion running out of tokens. " @@ -68,6 +74,41 @@ def llm_response_as_json(content): return content +def _parse_first_json_payload(content): + """[NOT PUBLIC API] Parse first valid JSON payload embedded in text + + Parameters + ---------- + content : str + Text that may contain one or more JSON payloads mixed with + additional non-JSON prose. + + Returns + ------- + object or None + Parsed JSON payload from the first decodable object/array in + the string. Returns ``None`` if no decodable payload exists. + + Notes + ----- + This helper scans for ``"{"`` and ``"["`` markers and attempts + ``json.JSONDecoder().raw_decode`` from each candidate position + until successful. + """ + decoder = json.JSONDecoder() + for start_char in ("{", "["): + start_ind = content.find(start_char) + while start_ind != -1: + try: + parsed_content, __ = decoder.raw_decode(content[start_ind:]) + except json.decoder.JSONDecodeError: + start_ind = content.find(start_char, start_ind + 1) + else: + return parsed_content + + return None + + def merge_overlapping_texts(text_chunks, n=300): """Merge text chunks while trimming overlapping boundaries diff --git a/tests/python/unit/llm/test_config.py b/tests/python/unit/llm/test_config.py new file mode 100644 index 000000000..210b426ab --- /dev/null +++ b/tests/python/unit/llm/test_config.py @@ -0,0 +1,54 @@ +"""Tests for LLM configuration helpers""" + +from pathlib import Path + +import pytest + +from compass.llm.config import OpenAIConfig + + +def test_openai_client_kwargs_loaded_from_env(monkeypatch): + """OpenAI kwargs can be populated from OPENAI_* env vars""" + monkeypatch.setenv("OPENAI_API_KEY", "test-openai-key") + monkeypatch.setenv("OPENAI_BASE_URL", "https://litellm.example.gov") + + config = OpenAIConfig(name="gpt-4o-mini", client_type="openai") + + assert config.client_kwargs["api_key"] == "test-openai-key" + assert config.client_kwargs["base_url"] == "https://litellm.example.gov" + + +def test_openai_client_kwargs_user_values_take_precedence(monkeypatch): + """Explicit client kwargs should not be replaced by env vars""" + monkeypatch.setenv("OPENAI_API_KEY", "env-key") + monkeypatch.setenv("OPENAI_BASE_URL", "https://env.example") + + config = OpenAIConfig( + name="gpt-4o-mini", + client_type="openai", + client_kwargs={ + "api_key": "user-key", + "base_url": "https://user.example", + }, + ) + + assert config.client_kwargs["api_key"] == "user-key" + assert config.client_kwargs["base_url"] == "https://user.example" + + +def test_azure_client_kwargs_unchanged(monkeypatch): + """Azure env var mapping remains unchanged""" + monkeypatch.setenv("AZURE_OPENAI_API_KEY", "azure-key") + monkeypatch.setenv("AZURE_OPENAI_VERSION", "2024-02-15-preview") + monkeypatch.setenv("AZURE_OPENAI_ENDPOINT", "https://azure.example") + + config = OpenAIConfig(name="gpt-4o-mini", client_type="azure") + + assert config.client_kwargs["api_key"] == "azure-key" + assert config.client_kwargs["api_version"] == "2024-02-15-preview" + assert config.client_kwargs["azure_endpoint"] == "https://azure.example" + assert "base_url" not in config.client_kwargs + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/utilities/test_utilities_parsing.py b/tests/python/unit/utilities/test_utilities_parsing.py index 36afd4b0c..8cc1f0aed 100644 --- a/tests/python/unit/utilities/test_utilities_parsing.py +++ b/tests/python/unit/utilities/test_utilities_parsing.py @@ -43,6 +43,14 @@ def test_clean_backticks_from_llm_response(in_str, expected): ('{"a": True', {}), ('json\n{"key": "value"}', {"key": "value"}), ('{"a": True, "b": False}', {"a": True, "b": False}), + ( + ( + "I can extract date information from the URL provided. " + "However, the URL does not contain date information.\n\n" + '{"year": null, "month": null, "day": null}' + ), + {"year": None, "month": None, "day": None}, + ), ], ) def test_llm_response_as_json(in_str, expected): From 85e86e0f7704ccc7e9490ec125b11fb0fbf152a2 Mon Sep 17 00:00:00 2001 From: Byron Pullutasig <115118857+bpulluta@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:16:53 -0600 Subject: [PATCH 2/6] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- compass/utilities/parsing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 7e4d977d4..58dc13ebc 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -96,15 +96,15 @@ def _parse_first_json_payload(content): until successful. """ decoder = json.JSONDecoder() - for start_char in ("{", "["): - start_ind = content.find(start_char) - while start_ind != -1: - try: - parsed_content, __ = decoder.raw_decode(content[start_ind:]) - except json.decoder.JSONDecodeError: - start_ind = content.find(start_char, start_ind + 1) - else: - return parsed_content + for start_ind, start_char in enumerate(content): + if start_char not in ("{", "["): + continue + try: + parsed_content, __ = decoder.raw_decode(content[start_ind:]) + except json.decoder.JSONDecodeError: + continue + else: + return parsed_content return None From ab329cdb19c6014548d0865c6eb3c01c9c66c0a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 2 Jun 2026 21:26:34 +0000 Subject: [PATCH 3/6] Condense _parse_first_json_payload docstring to single-line summary --- compass/utilities/parsing.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 58dc13ebc..1d23cde72 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -75,26 +75,7 @@ def llm_response_as_json(content): def _parse_first_json_payload(content): - """[NOT PUBLIC API] Parse first valid JSON payload embedded in text - - Parameters - ---------- - content : str - Text that may contain one or more JSON payloads mixed with - additional non-JSON prose. - - Returns - ------- - object or None - Parsed JSON payload from the first decodable object/array in - the string. Returns ``None`` if no decodable payload exists. - - Notes - ----- - This helper scans for ``"{"`` and ``"["`` markers and attempts - ``json.JSONDecoder().raw_decode`` from each candidate position - until successful. - """ + """Parse first valid JSON payload embedded in text""" decoder = json.JSONDecoder() for start_ind, start_char in enumerate(content): if start_char not in ("{", "["): From 0f716958110abfef4bfc2edc0950a2d90e2905b0 Mon Sep 17 00:00:00 2001 From: Byron Pullutasig <115118857+bpulluta@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:31:11 -0600 Subject: [PATCH 4/6] fix indentation issue causing ruff failure --- compass/utilities/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 1d23cde72..ac928c136 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -78,7 +78,7 @@ def _parse_first_json_payload(content): """Parse first valid JSON payload embedded in text""" decoder = json.JSONDecoder() for start_ind, start_char in enumerate(content): - if start_char not in ("{", "["): + if start_char not in {"{", "["}: continue try: parsed_content, __ = decoder.raw_decode(content[start_ind:]) From b8bb788f9e3425ab169f93885c6418acc79707d1 Mon Sep 17 00:00:00 2001 From: Byron Pullutasig <115118857+bpulluta@users.noreply.github.com> Date: Wed, 3 Jun 2026 10:08:44 -0600 Subject: [PATCH 5/6] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- compass/utilities/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index ac928c136..19c1d08ce 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -59,7 +59,7 @@ def llm_response_as_json(content): content = json.loads(content) except json.decoder.JSONDecodeError: parsed_content = _parse_first_json_payload(content) - if parsed_content is not None: + if isinstance(parsed_content, dict): return parsed_content logger.exception( From da66d3f18c32aefd651baa54b4e3be368614f759 Mon Sep 17 00:00:00 2001 From: Byron Pullutasig <115118857+bpulluta@users.noreply.github.com> Date: Wed, 3 Jun 2026 10:20:54 -0600 Subject: [PATCH 6/6] formatted parsing.py for ruff issue --- compass/utilities/parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 19c1d08ce..37fb6e52e 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -59,7 +59,7 @@ def llm_response_as_json(content): content = json.loads(content) except json.decoder.JSONDecodeError: parsed_content = _parse_first_json_payload(content) - if isinstance(parsed_content, dict): + if isinstance(parsed_content, dict): return parsed_content logger.exception(