Source code for open_atp.harness.cost
"""Token-cost fallback table.
Ported near-verbatim from milp_flare's ``harness/cost.py``. Used to estimate a
run's USD cost from token counts when the harness does not report cost directly
(notably Codex). This is a fallback and will go stale; see the provider pricing
pages for current numbers.
"""
from __future__ import annotations
#: Cost per million tokens, as ``(input, output)``.
COST_PER_MTOK: dict[str, tuple[float, float]] = {
"claude-fable-5": (10.0, 50.0),
"claude-opus-4-8": (5.0, 25.0),
"claude-opus-4-7": (5.0, 25.0),
"claude-opus-4-6": (5.0, 25.0),
"claude-sonnet-4-6": (3.0, 15.0),
"claude-sonnet-4-5": (3.0, 15.0),
"claude-haiku-4-5": (1.0, 5.0),
"gpt-4.1": (2.0, 8.0),
"gpt-4o": (2.5, 10.0),
"gpt-4o-mini": (0.15, 0.60),
"gpt-5.5": (5.0, 30.0),
"gpt-5.4": (2.5, 15.0),
"gpt-5.4-mini": (0.75, 4.5),
"gpt-5.4-nano": (0.20, 1.25),
# Numina discussion-partner defaults (gpt/gemini backends). ESTIMATES -- verify
# against the provider pricing pages; unknown variants stay unpriced and are
# surfaced via NuminaProver's ``helper_unpriced_models`` rather than billed at 0.
"gpt-5.4-pro": (15.0, 120.0),
"gemini-3.1-pro-preview": (2.0, 12.0),
"deepseek-v4-pro": (1.74, 3.48),
"deepseek-v4-flash": (0.14, 0.28),
}
[docs]
def compute_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float | None:
"""Estimate the USD cost of a run from token counts.
Returns ``None`` when ``model`` is absent from :data:`COST_PER_MTOK`.
"""
entry = COST_PER_MTOK.get(model)
if entry is None:
return None
input_price, output_price = entry
return (input_tokens * input_price + output_tokens * output_price) / 1_000_000