Source code for open_atp.harness.vibe

"""Mistral Vibe CLI harness (drives the builtin ``lean`` agent / Leanstral)."""

from __future__ import annotations

import json
import os
import shutil
from pathlib import Path
from typing import Any

from open_atp.harness._paths import _SCRIPTS, _VIBE_ASSETS
from open_atp.harness.base import AuthSpec, Harness, HarnessRunResult
from open_atp.harness.bundles import AssetBundle



[docs]
class VibeHarness(Harness):
    """Mistral Vibe CLI driving its builtin ``lean`` agent (Leanstral) in a sandbox.

    Vibe's ``lean`` agent *is* Leanstral: ``vibe -p ... --agent lean`` pins the model
    to ``leanstral`` via the builtin agent profile (there is no ``--model`` flag). The
    bare ``lean`` profile is Labs-gated, so the ``lean-standin`` stand-in (vendored
    under ``assets/vibe/``) runs the same Lean scaffold on a non-Labs model until Labs
    access is enabled. The selected profile is named by :attr:`agent`. Since vibe has
    no ``--model`` flag, the stand-in profile templates ``<<MODEL>>`` and the harness
    substitutes :attr:`model` into it at :meth:`configure_wd` time -- so the model is a
    knob (default Magistral) just like the other harnesses' ``--model``.

    Two things differ from the other harnesses:

    * **VIBE_HOME is workdir-local.** ``vibe_agent.sh`` exports
      ``VIBE_HOME=$PWD/.vibe`` so vibe's config (which un-gates the builtin ``lean``
      agent), the vendored stand-in agent, and the per-session log all live under the
      workdir and sync back out with it.
    * **Cost comes from the session log, not stdout.** ``--output streaming`` carries
      only conversation messages -- no token/cost totals. Those live in vibe's
      per-session ``meta.json``; :meth:`parse` reads it from the synced-back log dir.
    """

    name = "vibe"

    #: Workdir-relative VIBE_HOME (matches the export in ``vibe_agent.sh``).
    VIBE_HOME_DIR = ".vibe"

    def __init__(
        self,
        model: str,
        effort: str = "medium",
        *,
        agent: str = "lean",
        max_turns: int | None = None,
        max_price: float | None = None,
        assets: AssetBundle | None = None,
    ) -> None:
        super().__init__(model, effort, assets)
        self.agent = agent
        self.max_turns = max_turns
        self.max_price = max_price
        #: Set in :meth:`configure_wd`; where :meth:`parse` looks for session logs.
        self._session_log_dir: Path | None = None


[docs]
    @classmethod
    def from_config(cls, config: Any, *, assets: AssetBundle | None = None) -> Harness:
        return cls(
            config.model,
            config.effort,
            agent=getattr(config, "agent", "lean"),
            max_turns=getattr(config, "max_turns", None),
            max_price=getattr(config, "max_price", None),
            assets=assets,
        )



[docs]
    def auth_spec(self) -> AuthSpec:
        # The lean agent's provider reads MISTRAL_API_KEY from the process env
        # (api_key_env_var); forward it from the host into the sandbox.
        if "MISTRAL_API_KEY" not in os.environ:
            raise RuntimeError(
                "vibe harness requires MISTRAL_API_KEY (a Mistral La Plateforme key)"
            )
        return AuthSpec(env=["MISTRAL_API_KEY"])


    def configure_wd(self, wd: Path, prompt: str) -> None:
        super().configure_wd(wd, prompt)
        # Workdir-local VIBE_HOME: a minimal config that un-gates the builtin `lean`
        # agent, plus the vendored stand-in agent profile. Session logs (with cost)
        # default to VIBE_HOME/logs/session, so they land here and sync back out.
        vibe_home = wd / self.VIBE_HOME_DIR
        agents_dir = vibe_home / "agents"
        agents_dir.mkdir(parents=True, exist_ok=True)
        # ``bypass_tool_permissions`` is the *only* thing that ungates mutating tools
        # (``edit``, ``write_file``) in ``vibe -p`` programmatic mode: there is no
        # approval callback, so any tool that resolves to ``ASK`` is answered "Tool
        # execution not permitted" and silently skipped (agent_loop.py short-circuits
        # to EXECUTE when this is set). The ``--auto-approve``/``--yolo`` CLI flag can't
        # be used instead -- it forces ``--agent auto-approve``, discarding the ``lean``
        # scaffold. The builtin ``lean`` profile sets no permission bypass, so without
        # this even the real Leanstral cannot write its proof. Set it on the base config
        # so it applies to ``--agent lean`` and the vendored stand-ins alike.
        #
        # ``mcp_servers`` wires in lean-lsp-mcp (the same server the other harnesses
        # mount via .mcp.json) so the agent actually gets the compile/diagnostic
        # feedback loop the prompt assumes. Vibe publishes its tools as
        # ``lean-lsp_<tool>``; discovery failure is non-fatal (logged, agent runs
        # without them) so this is safe even if the server is missing.
        #
        # ``tool_timeout_sec`` mirrors the 180s opencode fix: the first
        # ``lean_diagnostic_messages`` call starts ``lake serve`` and loads the file's
        # full Mathlib import closure into the LSP, which blows through vibe's 60s
        # default tool timeout on a cold, few-CPU Modal sandbox (the
        # ``test_lean_lsp_mcp[vibe-modal]`` probe timed out at exactly 60s). Vibe's
        # field is in *seconds* (opencode's is ms).
        (vibe_home / "config.toml").write_text(
            'installed_agents = ["lean"]\n'
            "bypass_tool_permissions = true\n\n"
            "[session_logging]\nenabled = true\n\n"
            "[[mcp_servers]]\n"
            'transport = "stdio"\n'
            'name = "lean-lsp"\n'
            'command = "lean-lsp-mcp"\n'
            "tool_timeout_sec = 180\n"
        )
        # The vendored stand-in profile lives as ``<agent>.toml`` (``lean-standin``).
        # Vibe has no ``--model`` flag, so the model is templated into the profile:
        # ``_render`` substitutes ``<<MODEL>>`` with ``self.model`` (default Magistral)
        # as it writes the copy into the workdir. The builtin ``lean`` agent (real
        # Leanstral) ships with vibe and pins its own model, so it needs no profile.
        if self.agent != "lean":
            profile = _VIBE_ASSETS / f"{self.agent}.toml"
            (agents_dir / profile.name).write_text(self._render(profile.read_text()))
        # Mount the selected bundle's skills (by default the vendored ``lean-proof``
        # skill) under VIBE_HOME/skills -- vibe's *user* skills dir, which loads
        # regardless of project-folder trust. The other harnesses copy into
        # ``.claude/skills`` / ``.agents/skills`` (project dirs); those are gated
        # behind ``--trust`` in ``vibe -p``, so the VIBE_HOME-relative location is
        # the parity-preserving spot.
        self._copy_skills(wd, f"{self.VIBE_HOME_DIR}/skills")
        self._session_log_dir = vibe_home / "logs" / "session"

    def _agent_command(self) -> str:
        template = (_SCRIPTS / "vibe_agent.sh").read_text()
        extra = ""
        if self.max_turns is not None:
            extra += f" \\\n    --max-turns {int(self.max_turns)}"
        if self.max_price is not None:
            extra += f" \\\n    --max-price {self.max_price}"
        return template.replace("<<AGENT>>", self.agent).replace("<<EXTRA>>", extra)


[docs]
    def parse(self, lines: list[str]) -> HarnessRunResult:
        # Final assistant text + stop signal come from the NDJSON stream; token/cost
        # totals come from the session log, which the stream does not carry.
        result = self._parse_lines(lines)
        stats = self._read_session_stats()
        if stats is not None:
            cost = stats.get("session_cost")
            if isinstance(cost, (int, float)):
                result.cost_usd = float(cost)
            result.input_tokens = int(stats.get("session_prompt_tokens", 0) or 0)
            result.output_tokens = int(stats.get("session_completion_tokens", 0) or 0)
        return result



[docs]
    def collect_logs(self, wd: Path, logs_dir: Path) -> None:
        # Vibe's per-session record (message log + meta.json with cost) lives under the
        # workdir-local VIBE_HOME (``.vibe/logs``). Move the whole logs tree out to
        # ``logs/vibe-session`` -- preserving its internal structure but dropping the
        # ``.vibe/logs`` prefix -- so it leaves the proof project. The rest of VIBE_HOME
        # (config.toml, agents/) is scaffolding and stays in the workdir.
        src = wd / self.VIBE_HOME_DIR / "logs"
        if src.is_dir():
            logs_dir.mkdir(parents=True, exist_ok=True)
            shutil.move(str(src), str(logs_dir / "vibe-session"))


    def _read_session_stats(self) -> dict[str, Any] | None:
        """Load ``stats`` from the most recent session ``meta.json``, if present."""
        if self._session_log_dir is None or not self._session_log_dir.is_dir():
            return None
        metas = sorted(self._session_log_dir.glob("*/meta.json"))
        if not metas:
            return None
        try:
            data = json.loads(metas[-1].read_text())
        except (json.JSONDecodeError, OSError):
            return None
        stats = data.get("stats")
        return stats if isinstance(stats, dict) else None

    def _parse_lines(self, lines: list[str]) -> HarnessRunResult:
        """Pull the final assistant message out of vibe's NDJSON message stream."""
        result = HarnessRunResult()
        for line in lines:
            line = line.strip()
            if not line.startswith("{"):
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj.get("role") == "assistant":
                content = obj.get("content")
                if isinstance(content, str) and content:
                    result.result_text = content
        return result