ThreeGate/tools/validate_research_packet.py

115 lines
2.9 KiB
Python

#!/usr/bin/env python3
"""
Validate a Research Packet against schemas/research-packet.schema.md (schema_version=1).
Usage:
validate_research_packet.py /path/to/packet.md
Exit codes:
0 = valid
2 = invalid
3 = error (I/O, parse)
"""
from __future__ import annotations
import sys
from typing import List
from validate_common import (
ValidationResult,
extract_front_matter,
find_forbidden,
read_text,
require_keys,
require_sections_in_order,
)
REQUIRED_KEYS = [
"packet_type",
"schema_version",
"packet_id",
"created_utc",
"source_kind",
"source_ref",
"title",
"retrieved_utc",
"license",
]
REQUIRED_H2 = [
"## Executive Summary",
"## Source Metadata",
"## Extracted Content",
"## Claims and Evidence",
"## Safety Notes",
"## Citations",
]
def validate(path: str) -> ValidationResult:
errors: List[str] = []
warnings: List[str] = []
md = read_text(path)
fm, body = extract_front_matter(md)
missing = require_keys(fm, REQUIRED_KEYS)
if missing:
errors.append(f"Missing required front matter keys: {', '.join(missing)}")
if fm.get("packet_type") != "research_packet":
errors.append(f"packet_type must be 'research_packet' (got: {fm.get('packet_type')!r})")
if fm.get("schema_version") != "1":
errors.append(f"schema_version must be '1' (got: {fm.get('schema_version')!r})")
errors.extend(require_sections_in_order(body, REQUIRED_H2))
# Safety Notes must include explicit untrusted statement
if "## Safety Notes" in body:
if "Untrusted Content Statement" not in body:
errors.append("Safety Notes must include 'Untrusted Content Statement:'")
if "Injection Indicators" not in body:
errors.append("Safety Notes must include 'Injection Indicators:'")
# Forbidden content scanning (whole document)
forbidden_hits = find_forbidden(md)
if forbidden_hits:
errors.extend(forbidden_hits)
# Basic citation expectation
if "## Citations" in body and "[C1]" not in body:
warnings.append("No [C#] citation labels found; ensure citations are present and stable.")
return ValidationResult(ok=(len(errors) == 0), errors=errors, warnings=warnings)
def main() -> int:
if len(sys.argv) != 2:
print(__doc__.strip(), file=sys.stderr)
return 3
path = sys.argv[1]
try:
res = validate(path)
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
return 3
if res.ok:
for w in res.warnings:
print(f"WARNING: {w}", file=sys.stderr)
print("ACCEPT")
return 0
else:
for e in res.errors:
print(f"ERROR: {e}", file=sys.stderr)
for w in res.warnings:
print(f"WARNING: {w}", file=sys.stderr)
print("REJECT")
return 2
if __name__ == "__main__":
raise SystemExit(main())