#!/usr/bin/env python3 """ Validate a Research Packet against schemas/research-packet.schema.md (schema_version=1). Usage: validate_research_packet.py /path/to/packet.md Exit codes: 0 = valid 2 = invalid 3 = error (I/O, parse) """ from __future__ import annotations import sys from typing import List from validate_common import ( ValidationResult, extract_front_matter, find_forbidden, read_text, require_keys, require_sections_in_order, ) REQUIRED_KEYS = [ "packet_type", "schema_version", "packet_id", "created_utc", "source_kind", "source_ref", "title", "retrieved_utc", "license", ] REQUIRED_H2 = [ "## Executive Summary", "## Source Metadata", "## Extracted Content", "## Claims and Evidence", "## Safety Notes", "## Citations", ] def validate(path: str) -> ValidationResult: errors: List[str] = [] warnings: List[str] = [] md = read_text(path) fm, body = extract_front_matter(md) missing = require_keys(fm, REQUIRED_KEYS) if missing: errors.append(f"Missing required front matter keys: {', '.join(missing)}") if fm.get("packet_type") != "research_packet": errors.append(f"packet_type must be 'research_packet' (got: {fm.get('packet_type')!r})") if fm.get("schema_version") != "1": errors.append(f"schema_version must be '1' (got: {fm.get('schema_version')!r})") errors.extend(require_sections_in_order(body, REQUIRED_H2)) # Safety Notes must include explicit untrusted statement if "## Safety Notes" in body: if "Untrusted Content Statement" not in body: errors.append("Safety Notes must include 'Untrusted Content Statement:'") if "Injection Indicators" not in body: errors.append("Safety Notes must include 'Injection Indicators:'") # Forbidden content scanning (whole document) forbidden_hits = find_forbidden(md) if forbidden_hits: errors.extend(forbidden_hits) # Basic citation expectation if "## Citations" in body and "[C1]" not in body: warnings.append("No [C#] citation labels found; ensure citations are present and stable.") return ValidationResult(ok=(len(errors) == 0), errors=errors, warnings=warnings) def main() -> int: if len(sys.argv) != 2: print(__doc__.strip(), file=sys.stderr) return 3 path = sys.argv[1] try: res = validate(path) except Exception as e: print(f"ERROR: {e}", file=sys.stderr) return 3 if res.ok: for w in res.warnings: print(f"WARNING: {w}", file=sys.stderr) print("ACCEPT") return 0 else: for e in res.errors: print(f"ERROR: {e}", file=sys.stderr) for w in res.warnings: print(f"WARNING: {w}", file=sys.stderr) print("REJECT") return 2 if __name__ == "__main__": raise SystemExit(main())