Source code for open_atp.harness.claude_code
"""Claude Code CLI harness."""
from __future__ import annotations
import json
import os
import shutil
from pathlib import Path
from open_atp.harness._catalog import resolve_plugin
from open_atp.harness._paths import _MCP_JSON, _SCRIPTS
from open_atp.harness.base import Harness, HarnessRunResult
[docs]
class ClaudeCodeHarness(Harness):
"""Claude Code CLI, authenticated by a long-lived ``CLAUDE_CODE_OAUTH_TOKEN``.
Claude Code is the only harness that loads plugins, so they live here rather than
on the prover's shared skills list.
Parameters
----------
model : str
Model id the agent runs. Default ``"claude-opus-4-8"``.
effort : str
Reasoning-effort level. Default ``"high"``.
plugins : list[str], optional
Claude Code plugins to load, each a name (resolved from the vendored
``lean4-skills`` catalog) or a full path to a ``.claude-plugin/plugin.json``
tree. Default ``["lean4"]``; an empty list loads none.
oauth_token : str, optional
The ``CLAUDE_CODE_OAUTH_TOKEN`` (from ``claude setup-token``) to forward into
the sandbox. ``None`` (default) reads it from the host
``CLAUDE_CODE_OAUTH_TOKEN`` env var; resolution fails if neither is set.
Examples
--------
>>> from open_atp.harness import ClaudeCodeHarness
>>> harness = ClaudeCodeHarness()
>>> harness.name
'claude_code'
>>> harness.plugins
['lean4']
With the credential supplied explicitly, :meth:`agent_auth` resolves the full
forwarded env without touching the host environment:
>>> harness = ClaudeCodeHarness(plugins=[], oauth_token="sk-ant-oat-fake")
>>> harness.agent_auth().env
{'IS_SANDBOX': '1', 'CLAUDE_CODE_OAUTH_TOKEN': 'sk-ant-oat-fake'}
"""
name = "claude_code"
skills_dest = ".claude/skills"
#: Where plugin dirs are staged in the workdir (the launch script's
#: ``--plugin-dir`` flags reference this, so the two must agree).
PLUGINS_DIR = ".plugins"
def __init__(
self,
*,
model: str = "claude-opus-4-8",
effort: str = "high",
plugins: list[str] | None = None,
oauth_token: str | None = None,
) -> None:
super().__init__(model=model, effort=effort)
# plugins documented as a class Parameter/Attribute above.
self.plugins = plugins if plugins is not None else ["lean4"]
self._oauth_token = oauth_token
[docs]
def stage_wd(self, wd: Path) -> None:
super().stage_wd(wd)
# Project-scope MCP config (passed via --mcp-config) and plugins.
shutil.copy2(_MCP_JSON, wd / ".mcp.json")
self._copy_plugins(wd)
def _resolved_plugins(self) -> list[Path]:
"""``self.plugins`` (names or paths) resolved to plugin source dirs."""
return [resolve_plugin(p) for p in self.plugins]
def _copy_plugins(self, wd: Path) -> None:
"""Stage each configured plugin under ``wd/.plugins/<name>``.
Claude is the only harness that consumes plugins (so they live on this harness,
not the shared skills list); the launch script
loads them with ``--plugin-dir`` (see :meth:`_plugin_flags`). Plugins are copied
*into* the workdir (not referenced from the host vendor tree) so they sync
into the sandbox with everything else.
"""
for plugin in self._resolved_plugins():
shutil.copytree(
plugin, wd / self.PLUGINS_DIR / plugin.name, dirs_exist_ok=True
)
def _plugin_flags(self) -> str:
"""``--plugin-dir`` flags (one per plugin) appended to the launch command.
Empty when no plugins; otherwise a leading line-continuation so it grafts
onto the end of the ``claude -p ...`` invocation.
"""
return "".join(
f" \\\n --plugin-dir {self.PLUGINS_DIR}/{p.name}"
for p in self._resolved_plugins()
)
def _static_env(self) -> dict[str, str]:
# Lets bypassPermissions run non-interactively in the container.
env = {"IS_SANDBOX": "1"}
# Plugin-provided subagents (e.g. lean4's sorry-filler-deep) are only
# dispatchable in a headless `-p` run with subagent forking enabled.
if self.plugins:
env["CLAUDE_CODE_FORK_SUBAGENT"] = "1"
return env
def _required_env(self) -> dict[str, str]:
# A long-lived token (from `claude setup-token`) bills against a Claude
# subscription rather than at the higher per-API-call rate.
token = self._oauth_token or os.environ.get("CLAUDE_CODE_OAUTH_TOKEN")
if not token:
raise RuntimeError(
"claude_code harness requires CLAUDE_CODE_OAUTH_TOKEN"
" from `claude setup-token`"
)
return {"CLAUDE_CODE_OAUTH_TOKEN": token}
def _agent_command(self) -> str:
template = self._render((_SCRIPTS / "claude_code_agent.sh").read_text())
return template.replace("<<PLUGIN_FLAGS>>", self._plugin_flags())
def _parse_lines(self, lines: list[str]) -> HarnessRunResult:
"""Parse ``claude -p --output-format stream-json`` output."""
result = HarnessRunResult()
for line in lines:
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
if obj.get("type") == "result":
result.stop_reason = obj.get("stop_reason")
result.cost_usd = obj.get("total_cost_usd")
result.subtype = obj.get("subtype")
rt = obj.get("result")
result.result_text = rt if isinstance(rt, str) else None
usage = obj.get("usage", {})
result.input_tokens = usage.get("input_tokens", result.input_tokens)
result.output_tokens = usage.get("output_tokens", result.output_tokens)
return result