Source code for open_atp.lean

"""The Lean input contract: the project to complete, the task, and staging helpers.

The input contract is a *full lake project*: it must carry its own
``lean-toolchain`` and ``lake-manifest.json``. We reject up front when the
project's pinned toolchain (or Mathlib revision) does not match the sandbox image
we are about to run it in, rather than failing deep inside a build.

A full lake project on disk is already a :class:`LeanProject` (just
``LeanProject(Path(path))``); the only nontrivial case is staging one or more bare
``.lean`` files into the pinned Mathlib skeleton, which :func:`stage_files` does.
"""

from __future__ import annotations

import json
import re
import shutil
from collections.abc import Sequence
from dataclasses import dataclass, field
from pathlib import Path

from open_atp.images import SKELETON_DIR


[docs] class ToolchainMismatch(ValueError): """Raised when an uploaded project's toolchain does not match the image's pin."""
class MathlibRevMismatch(ValueError): """Raised when an uploaded project's Mathlib revision differs from the image's."""
[docs] @dataclass(frozen=True) class LeanProject: """A complete lake project on disk. Parameters ---------- root: Directory containing ``lakefile.toml`` (or ``lakefile.lean``), ``lean-toolchain``, and ``lake-manifest.json``. Examples -------- Point :class:`LeanProject` at a lake project directory and inspect it (here a minimal project is written to a temp dir to keep the example self-contained): >>> import tempfile >>> from pathlib import Path >>> from open_atp.lean import LeanProject >>> root = Path(tempfile.mkdtemp()) >>> _ = (root / "lakefile.toml").write_text('name = "demo"\\n') >>> _ = (root / "lean-toolchain").write_text("leanprover/lean4:v4.31.0\\n") >>> _ = (root / "Demo.lean").write_text("theorem t : True := by sorry\\n") >>> project = LeanProject(root) >>> project.lean_toolchain 'leanprover/lean4:v4.31.0' >>> [p.name for p in project.lean_files()] ['Demo.lean'] >>> [p.name for p in project.files_with_sorry()] ['Demo.lean'] """ root: Path def __post_init__(self) -> None: object.__setattr__(self, "root", Path(self.root).resolve()) if not self._lakefile(): raise FileNotFoundError(f"No lakefile.toml/lakefile.lean under {self.root}") if not (self.root / "lean-toolchain").is_file(): raise FileNotFoundError(f"No lean-toolchain under {self.root}") def _lakefile(self) -> Path | None: for name in ("lakefile.toml", "lakefile.lean"): p = self.root / name if p.is_file(): return p return None @property def lean_toolchain(self) -> str: """The pinned toolchain, e.g. ``leanprover/lean4:v4.31.0``.""" return (self.root / "lean-toolchain").read_text().strip() @property def mathlib_rev(self) -> str | None: """The pinned Mathlib revision from ``lake-manifest.json``, if present. Prefers the human-declared ``inputRev`` (e.g. ``v4.28.0``) so it is comparable to an :class:`~open_atp.images.Image`'s declared :attr:`~open_atp.images.Image.mathlib_rev`, falling back to the resolved git ``rev`` (a commit SHA) when no ``inputRev`` is recorded. """ manifest = self.root / "lake-manifest.json" if not manifest.is_file(): return None data = json.loads(manifest.read_text()) for pkg in data.get("packages", []): if pkg.get("name") == "mathlib": rev = pkg.get("inputRev") or pkg.get("rev") return rev if isinstance(rev, str) else None return None
[docs] def lean_files(self) -> list[Path]: """All ``.lean`` source files, excluding build artifacts under ``.lake``.""" return [p for p in self.root.rglob("*.lean") if ".lake" not in p.parts]
[docs] def files_with_sorry(self) -> list[Path]: """Lean files that contain a ``sorry`` token (cheap textual scan).""" pat = re.compile(r"\bsorry\b") return [p for p in self.lean_files() if pat.search(p.read_text())]
[docs] @dataclass(frozen=True) class ProofTask: """A unit of work: complete the sorrys in ``project``. Parameters ---------- project: The lake project to complete. targets: Optional explicit list of files (relative to ``project.root``) to focus on. When empty, every file containing ``sorry`` is fair game. instructions: Optional natural-language guidance forwarded to provers that accept a prompt (e.g. Aristotle, or the agent system prompt). """ project: LeanProject targets: tuple[Path, ...] = () instructions: str | None = None metadata: dict[str, str] = field(default_factory=dict)
[docs] def resolved_targets(self) -> list[Path]: if self.targets: return [self.project.root / t for t in self.targets] return self.project.files_with_sorry()
[docs] def stage_files( files: Sequence[Path | str], dest: Path | str, *, skeleton: Path = SKELETON_DIR, ) -> LeanProject: """Stage bare ``.lean`` files into the default Mathlib skeleton -> a project. Convenience for the *"upload one or more ``.lean`` files"* contract: copies the skeleton's ``lakefile.toml`` + ``lean-toolchain`` into ``dest`` and drops the files at its root. Limitation: this only works for the pinned toolchain/deps the skeleton (and the baked image) provide -- a file needing a different Mathlib revision or extra deps must arrive as a full lake project instead. Examples -------- Stage a bare ``.lean`` file into a skeleton to get a complete :class:`LeanProject` (here a minimal skeleton is written to a temp dir; in practice ``skeleton`` defaults to the baked image's ``SKELETON_DIR``): >>> import tempfile >>> from pathlib import Path >>> from open_atp.lean import stage_files >>> skeleton = Path(tempfile.mkdtemp()) >>> _ = (skeleton / "lakefile.toml").write_text('name = "demo"\\n') >>> _ = (skeleton / "lean-toolchain").write_text("leanprover/lean4:v4.31.0\\n") >>> bare = Path(tempfile.mkdtemp()) / "Bare.lean" >>> _ = bare.write_text("theorem t : True := by sorry\\n") >>> dest = Path(tempfile.mkdtemp()) / "project" >>> project = stage_files([bare], dest, skeleton=skeleton) >>> project.lean_toolchain 'leanprover/lean4:v4.31.0' >>> [p.name for p in project.lean_files()] ['Bare.lean'] """ if not (skeleton / "lean-toolchain").is_file(): raise FileNotFoundError( f"No skeleton project at {skeleton} (only available in a source " "checkout); submit a full lake project directory instead." ) dest = Path(dest) dest.mkdir(parents=True, exist_ok=True) skeleton_files = ( "lakefile.toml", "lakefile.lean", "lean-toolchain", "lake-manifest.json", ) for name in skeleton_files: src = skeleton / name if src.is_file(): shutil.copy2(src, dest / name) for f in files: f = Path(f) if f.suffix != ".lean": raise ValueError(f"Expected a .lean file, got {f}") shutil.copy2(f, dest / f.name) return LeanProject(dest)