Docs update

Shashikant86 · Shashikant86 · commit 1f6bc331f988 · 2026-02-20T18:36:10.000Z
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,8 +12,8 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  lint:
-    name: Lint & Format Check
+  ruff:
+    name: Ruff Check
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -26,104 +26,5 @@ jobs:
       - name: Install dependencies
         run: uv sync --locked --dev
 
-      - name: Run ruff linter
+      - name: Run ruff
         run: uv run ruff check rlm_code tests
-
-      - name: Run ruff formatter check
-        run: uv run ruff format --check rlm_code tests
-
-  typecheck:
-    name: Type Check
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-        with:
-          version: "latest"
-
-      - name: Install dependencies
-        run: uv sync --locked --dev
-
-      - name: Run mypy on core modules
-        run: |
-          uv run mypy rlm_code/core/config.py rlm_code/core/debug_logger.py rlm_code/mcp/utils.py rlm_code/mcp/retry.py rlm_code/models/cache.py rlm_code/models/streaming.py rlm_code/validation/security.py --ignore-missing-imports
-
-  test:
-    name: Test - Python ${{ matrix.python-version }} on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-        python-version: ["3.11", "3.12", "3.13"]
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-        with:
-          version: "latest"
-
-      - name: Install dependencies
-        run: uv sync --locked --python ${{ matrix.python-version }} --extra test
-
-      - name: Run tests
-        run: uv run pytest tests/ -v --cov=rlm_code --cov-report=xml --cov-report=term-missing
-
-      - name: Upload coverage
-        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12'
-        uses: codecov/codecov-action@v4
-        with:
-          file: ./coverage.xml
-          fail_ci_if_error: false
-
-  rlm-benchmark-gate:
-    name: RLM Benchmark Gate
-    runs-on: ubuntu-latest
-    needs: [lint, test]
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-        with:
-          version: "latest"
-
-      - name: Install dependencies
-        run: uv sync --locked --extra test
-
-      - name: Run deterministic RLM benchmark gate
-        run: |
-          uv run python scripts/rlm_bench_gate.py \
-            --baseline tests/fixtures/rlm_ci_baseline_generic_smoke.json \
-            --preset generic_smoke \
-            --limit 2
-
-  build:
-    name: Build Package
-    runs-on: ubuntu-latest
-    needs: [lint, test, rlm-benchmark-gate]
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v4
-        with:
-          version: "latest"
-
-      - name: Build package
-        run: uv build
-
-      - name: Check distribution
-        run: |
-          uv tool run twine check dist/*
-
-      - name: Upload build artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: dist
-          path: dist/
-          retention-days: 7
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -30,22 +30,3 @@ jobs:
           name: release-dist
           path: dist/
           retention-days: 7
-
-  publish:
-    name: Publish to PyPI
-    needs: build
-    runs-on: ubuntu-latest
-    permissions:
-      id-token: write
-    environment:
-      name: pypi
-      url: https://pypi.org/project/rlm-code/
-    steps:
-      - name: Download distributions
-        uses: actions/download-artifact@v4
-        with:
-          name: release-dist
-          path: dist/
-
-      - name: Publish
-        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/rlm_code/commands/slash_commands.py b/rlm_code/commands/slash_commands.py
@@ -1600,9 +1600,7 @@ def cmd_harness(self, args: list):
                 show_warning_message("strategy=codemode requires mcp=on. Enabling MCP.")
                 include_mcp = True
             if strategy == "codemode" and allowlist:
-                show_warning_message(
-                    "tools=... allowlist is ignored for strategy=codemode."
-                )
+                show_warning_message("tools=... allowlist is ignored for strategy=codemode.")
                 allowlist = None
 
             console.print()
diff --git a/rlm_code/harness/registry.py b/rlm_code/harness/registry.py
@@ -333,7 +333,10 @@ def _is_mcp_tool_allowed(self, full_name: str) -> tuple[bool, str | None]:
         normalized_server = server_name.strip().lower()
         normalized_tool = tool_name.strip().lower()
 
-        if self._mcp_allowed_servers is not None and normalized_server not in self._mcp_allowed_servers:
+        if (
+            self._mcp_allowed_servers is not None
+            and normalized_server not in self._mcp_allowed_servers
+        ):
             return (
                 False,
                 f"MCP tool '{full_name}' blocked by MCP policy (server '{server_name}' not allowed).",
@@ -351,11 +354,7 @@ def list_tools(self, *, include_mcp: bool = True) -> list[HarnessToolSpec]:
         if include_mcp and self.mcp_manager is not None:
             mcp_specs = self._list_mcp_specs()
             if self._mcp_allowed_tools is not None or self._mcp_allowed_servers is not None:
-                mcp_specs = [
-                    row
-                    for row in mcp_specs
-                    if self._is_mcp_tool_allowed(row.name)[0]
-                ]
+                mcp_specs = [row for row in mcp_specs if self._is_mcp_tool_allowed(row.name)[0]]
             specs = list(specs_map.values())
             specs.extend(mcp_specs)
             alias_candidates: dict[str, list[HarnessToolSpec]] = {}
@@ -985,7 +984,9 @@ def _domain_allowed(*, domain: str, allowed_domains: set[str], blocked_domains:
         return False
     if blocked_domains and any(domain == d or domain.endswith(f".{d}") for d in blocked_domains):
         return False
-    if allowed_domains and not any(domain == d or domain.endswith(f".{d}") for d in allowed_domains):
+    if allowed_domains and not any(
+        domain == d or domain.endswith(f".{d}") for d in allowed_domains
+    ):
         return False
     return True
 
diff --git a/rlm_code/harness/runner.py b/rlm_code/harness/runner.py
@@ -250,7 +250,9 @@ def _run_codemode_strategy(
             mcp_tool_allowlist=mcp_tool_allowlist,
             mcp_server=mcp_server,
         )
-        server_name = self._resolve_codemode_server(tool_rows=tool_rows, requested_server=mcp_server)
+        server_name = self._resolve_codemode_server(
+            tool_rows=tool_rows, requested_server=mcp_server
+        )
         if not server_name:
             return HarnessRunResult(
                 completed=False,
diff --git a/rlm_code/rlm/benchmark_manager.py b/rlm_code/rlm/benchmark_manager.py
@@ -174,7 +174,11 @@ def run_benchmark(
         """Execute a benchmark preset and persist aggregate summary."""
         resolved_mode = self._normalize_benchmark_mode(mode)
         resolved_harness_strategy = self._normalize_harness_strategy(harness_strategy)
-        if resolved_mode == "harness" and resolved_harness_strategy == "codemode" and not include_mcp:
+        if (
+            resolved_mode == "harness"
+            and resolved_harness_strategy == "codemode"
+            and not include_mcp
+        ):
             logger.warning("Harness codemode strategy requires MCP; enabling include_mcp.")
             include_mcp = True
         benchmark_id = datetime.now(timezone.utc).strftime("bench_%Y%m%d_%H%M%S_%f")
@@ -291,11 +295,7 @@ def run_benchmark(
             "mode": resolved_mode,
             "mcp_enabled": bool(include_mcp) if resolved_mode == "harness" else False,
             "mcp_server": str(mcp_server) if (resolved_mode == "harness" and mcp_server) else None,
-            "harness_strategy": (
-                resolved_harness_strategy
-                if resolved_mode == "harness"
-                else None
-            ),
+            "harness_strategy": (resolved_harness_strategy if resolved_mode == "harness" else None),
             "source": extra_sources.get(str(preset).strip().lower(), "builtin"),
             "description": extra_descriptions.get(str(preset).strip().lower(), ""),
             "pack_paths": [str(item) for item in (pack_paths or self._benchmark_pack_paths)],
@@ -961,12 +961,11 @@ def _run_benchmark_case_harness(
         for step in tool_steps:
             if step.tool_result is None:
                 continue
-            metadata = step.tool_result.metadata if isinstance(step.tool_result.metadata, dict) else {}
+            metadata = (
+                step.tool_result.metadata if isinstance(step.tool_result.metadata, dict) else {}
+            )
             resolved_name = str(
-                metadata.get("tool_full_name")
-                or metadata.get("resolved_tool")
-                or step.tool
-                or ""
+                metadata.get("tool_full_name") or metadata.get("resolved_tool") or step.tool or ""
             ).strip()
             if resolved_name.startswith("mcp:"):
                 mcp_tool_calls += 1
@@ -982,8 +981,7 @@ def _run_benchmark_case_harness(
             ):
                 codemode_discovery_calls += 1
         codemode_guardrail_blocked = any(
-            str(step.action) == "codemode_plan"
-            and "guardrail" in str(step.reasoning or "").lower()
+            str(step.action) == "codemode_plan" and "guardrail" in str(step.reasoning or "").lower()
             for step in result.steps
         )
         return {
diff --git a/tests/test_harness_runner.py b/tests/test_harness_runner.py
@@ -156,7 +156,9 @@ def test_harness_run_supports_codemode_strategy(tmp_path: Path) -> None:
     manager = _CodeModeMCPManager()
     runner = HarnessRunner(llm_connector=connector, mcp_manager=manager, workdir=tmp_path)
 
-    result = runner.run(task="Get weather in SF", strategy="codemode", include_mcp=True, max_steps=4)
+    result = runner.run(
+        task="Get weather in SF", strategy="codemode", include_mcp=True, max_steps=4
+    )
 
     assert result.completed is True
     assert "San Francisco" in result.final_response
diff --git a/tests/test_provider_registry.py b/tests/test_provider_registry.py
@@ -186,9 +186,14 @@ def test_connector_supported_provider_metadata_includes_connection_type(tmp_path
     assert isinstance(openai["example_models"], list)
 
 
-def test_connector_uses_superqode_model_catalog(tmp_path: Path):
+def test_connector_uses_superqode_model_catalog(tmp_path: Path, monkeypatch):
     """Provider model list should align with SuperQode catalog where available."""
     connector = make_connector(tmp_path)
+    monkeypatch.setattr(
+        connector,
+        "_list_opencode_models_cached",
+        lambda timeout=0.6: [],
+    )
 
     openai_models = connector.list_provider_example_models("openai", limit=3)
     gemini_models = connector.list_provider_example_models("gemini", limit=2)
diff --git a/uv.lock b/uv.lock