Source code for open_atp.harness.claude_code

"""Claude Code CLI harness."""

from __future__ import annotations

import json
import os
import shutil
from pathlib import Path

from open_atp.harness._catalog import resolve_plugin
from open_atp.harness._paths import _MCP_JSON, _SCRIPTS
from open_atp.harness.base import Harness, HarnessRunResult



[docs]
class ClaudeCodeHarness(Harness):
    """Claude Code CLI, authenticated by a long-lived ``CLAUDE_CODE_OAUTH_TOKEN``.

    Claude Code is the only harness that loads plugins, so they live here rather than
    on the prover's shared skills list.

    Parameters
    ----------
    model : str
        Model id the agent runs. Default ``"claude-opus-4-8"``.
    effort : str
        Reasoning-effort level. Default ``"high"``.
    plugins : list[str], optional
        Claude Code plugins to load, each a name (resolved from the vendored
        ``lean4-skills`` catalog) or a full path to a ``.claude-plugin/plugin.json``
        tree. Default ``["lean4"]``; an empty list loads none.
    oauth_token : str, optional
        The ``CLAUDE_CODE_OAUTH_TOKEN`` (from ``claude setup-token``) to forward into
        the sandbox. ``None`` (default) reads it from the host
        ``CLAUDE_CODE_OAUTH_TOKEN`` env var; resolution fails if neither is set.

    Examples
    --------
    >>> from open_atp.harness import ClaudeCodeHarness
    >>> harness = ClaudeCodeHarness()
    >>> harness.name
    'claude_code'
    >>> harness.plugins
    ['lean4']

    With the credential supplied explicitly, :meth:`agent_auth` resolves the full
    forwarded env without touching the host environment:

    >>> harness = ClaudeCodeHarness(plugins=[], oauth_token="sk-ant-oat-fake")
    >>> harness.agent_auth().env
    {'IS_SANDBOX': '1', 'CLAUDE_CODE_OAUTH_TOKEN': 'sk-ant-oat-fake'}
    """

    name = "claude_code"

    skills_dest = ".claude/skills"

    #: Where plugin dirs are staged in the workdir (the launch script's
    #: ``--plugin-dir`` flags reference this, so the two must agree).
    PLUGINS_DIR = ".plugins"

    def __init__(
        self,
        *,
        model: str = "claude-opus-4-8",
        effort: str = "high",
        plugins: list[str] | None = None,
        oauth_token: str | None = None,
    ) -> None:
        super().__init__(model=model, effort=effort)
        # plugins documented as a class Parameter/Attribute above.
        self.plugins = plugins if plugins is not None else ["lean4"]
        self._oauth_token = oauth_token


[docs]
    def stage_wd(self, wd: Path) -> None:
        super().stage_wd(wd)
        # Project-scope MCP config (passed via --mcp-config) and plugins.
        shutil.copy2(_MCP_JSON, wd / ".mcp.json")
        self._copy_plugins(wd)


    def _resolved_plugins(self) -> list[Path]:
        """``self.plugins`` (names or paths) resolved to plugin source dirs."""
        return [resolve_plugin(p) for p in self.plugins]

    def _copy_plugins(self, wd: Path) -> None:
        """Stage each configured plugin under ``wd/.plugins/<name>``.

        Claude is the only harness that consumes plugins (so they live on this harness,
        not the shared skills list); the launch script
        loads them with ``--plugin-dir`` (see :meth:`_plugin_flags`). Plugins are copied
        *into* the workdir (not referenced from the host vendor tree) so they sync
        into the sandbox with everything else.
        """
        for plugin in self._resolved_plugins():
            shutil.copytree(
                plugin, wd / self.PLUGINS_DIR / plugin.name, dirs_exist_ok=True
            )

    def _plugin_flags(self) -> str:
        """``--plugin-dir`` flags (one per plugin) appended to the launch command.

        Empty when no plugins; otherwise a leading line-continuation so it grafts
        onto the end of the ``claude -p ...`` invocation.
        """
        return "".join(
            f" \\\n    --plugin-dir {self.PLUGINS_DIR}/{p.name}"
            for p in self._resolved_plugins()
        )

    def _static_env(self) -> dict[str, str]:
        # Lets bypassPermissions run non-interactively in the container.
        env = {"IS_SANDBOX": "1"}
        # Plugin-provided subagents (e.g. lean4's sorry-filler-deep) are only
        # dispatchable in a headless `-p` run with subagent forking enabled.
        if self.plugins:
            env["CLAUDE_CODE_FORK_SUBAGENT"] = "1"
        return env

    def _required_env(self) -> dict[str, str]:
        # A long-lived token (from `claude setup-token`) bills against a Claude
        # subscription rather than at the higher per-API-call rate.
        token = self._oauth_token or os.environ.get("CLAUDE_CODE_OAUTH_TOKEN")
        if not token:
            raise RuntimeError(
                "claude_code harness requires CLAUDE_CODE_OAUTH_TOKEN"
                " from `claude setup-token`"
            )
        return {"CLAUDE_CODE_OAUTH_TOKEN": token}

    def _agent_command(self) -> str:
        template = self._render((_SCRIPTS / "claude_code_agent.sh").read_text())
        return template.replace("<<PLUGIN_FLAGS>>", self._plugin_flags())

    def _parse_lines(self, lines: list[str]) -> HarnessRunResult:
        """Parse ``claude -p --output-format stream-json`` output."""
        result = HarnessRunResult()
        for line in lines:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj.get("type") == "result":
                result.stop_reason = obj.get("stop_reason")
                result.cost_usd = obj.get("total_cost_usd")
                result.subtype = obj.get("subtype")
                rt = obj.get("result")
                result.result_text = rt if isinstance(rt, str) else None
                usage = obj.get("usage", {})
                result.input_tokens = usage.get("input_tokens", result.input_tokens)
                result.output_tokens = usage.get("output_tokens", result.output_tokens)
        return result