#!/usr/bin/env python3
"""
Warrant Reader Leaderboard — submission runner (v0 stub).

This is the public-facing CLI shape. The internals are stubs; the protocol,
arguments, and emitted JSONL format are stable and will be wire-compatible
with the v1 release.

Usage:
    python run_reader.py \\
        --reader   hf://meta-llama/Llama-3.1-8B-Instruct \\
        --artifact ../artifacts/frozen_retrieval_topK_500q.v1.jsonl \\
        --prompt   ../artifacts/benchmark_prompt.v1.md \\
        --out      submissions/llama-3.1-8b.jsonl

Reader URI schemes (planned):
    hf://<org>/<repo>            — HuggingFace transformers, local download
    vllm://<host>:<port>/<model> — OpenAI-compatible vLLM endpoint
    openai://<model>             — OpenAI Chat Completions
    anthropic://<model>          — Anthropic Messages
    local:///abs/path            — local weight directory
"""
from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

PROTOCOL_VERSION = "v0"


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    p.add_argument("--reader",   required=True, help="Reader URI (hf://, vllm://, openai://, anthropic://, local://).")
    p.add_argument("--artifact", required=True, type=Path, help="Path to frozen_retrieval_topK_500q.v1.jsonl")
    p.add_argument("--prompt",   required=True, type=Path, help="Path to benchmark_prompt.v1.md")
    p.add_argument("--out",      required=True, type=Path, help="Output JSONL of reader answers (one per question).")
    p.add_argument("--max_new_tokens", type=int, default=256)
    p.add_argument("--temperature",    type=float, default=0.0)
    p.add_argument("--reader_instructions", default="", help="String to interpolate into <<reader_instructions>> in the prompt.")
    p.add_argument("--limit",    type=int, default=None, help="Run only the first N questions (debugging).")
    return p.parse_args()


def load_jsonl(path: Path):
    if not path.exists():
        die(f"artifact not found: {path}\nDid you run runner/fetch_artifacts.py?")
    with path.open("r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                yield json.loads(line)


def die(msg: str, code: int = 1) -> None:
    print(f"[run_reader] error: {msg}", file=sys.stderr)
    sys.exit(code)


def stub_call_reader(reader_uri: str, prompt: str, *, max_new_tokens: int, temperature: float) -> dict:
    """STUB. Wire to the appropriate backend in v1.

    Returns:
        {"answer": str, "cited_chunks": list[int], "refused": bool, "raw": str}
    """
    raise NotImplementedError(
        f"reader backend for {reader_uri!r} is not wired yet in this v0 stub. "
        f"See runner/README.md for the planned reader URI schemes and "
        f"contact contact@manifoldmemory.ai for early-access wiring."
    )


def main() -> None:
    args = parse_args()
    prompt_template = args.prompt.read_text(encoding="utf-8")
    args.out.parent.mkdir(parents=True, exist_ok=True)
    n = 0
    with args.out.open("w", encoding="utf-8") as fout:
        for item in load_jsonl(args.artifact):
            if args.limit is not None and n >= args.limit:
                break
            qid = item.get("qid")
            qtype = item.get("qtype")
            chunks = item.get("chunks", [])
            user_block = "\n".join(f"[{i+1}] {c.get('text','')}" for i, c in enumerate(chunks[:10]))
            prompt = (
                prompt_template
                .replace("<<reader_instructions>>", args.reader_instructions)
                .replace("{question}", item.get("question", ""))
                .replace("{evidence_block}", user_block)
            )
            try:
                resp = stub_call_reader(
                    args.reader, prompt,
                    max_new_tokens=args.max_new_tokens,
                    temperature=args.temperature,
                )
            except NotImplementedError as e:
                die(str(e), code=2)
            fout.write(json.dumps({
                "schema_version": PROTOCOL_VERSION,
                "qid": qid,
                "qtype": qtype,
                "answer": resp["answer"],
                "cited_chunks": resp["cited_chunks"],
                "refused": resp["refused"],
                "raw": resp.get("raw", ""),
            }) + "\n")
            n += 1
    print(f"[run_reader] wrote {n} answers -> {args.out}")


if __name__ == "__main__":
    main()