115 lines
2.9 KiB
Python
115 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Validate a Research Packet against schemas/research-packet.schema.md (schema_version=1).
|
|
|
|
Usage:
|
|
validate_research_packet.py /path/to/packet.md
|
|
|
|
Exit codes:
|
|
0 = valid
|
|
2 = invalid
|
|
3 = error (I/O, parse)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from typing import List
|
|
|
|
from validate_common import (
|
|
ValidationResult,
|
|
extract_front_matter,
|
|
find_forbidden,
|
|
read_text,
|
|
require_keys,
|
|
require_sections_in_order,
|
|
)
|
|
|
|
REQUIRED_KEYS = [
|
|
"packet_type",
|
|
"schema_version",
|
|
"packet_id",
|
|
"created_utc",
|
|
"source_kind",
|
|
"source_ref",
|
|
"title",
|
|
"retrieved_utc",
|
|
"license",
|
|
]
|
|
|
|
REQUIRED_H2 = [
|
|
"## Executive Summary",
|
|
"## Source Metadata",
|
|
"## Extracted Content",
|
|
"## Claims and Evidence",
|
|
"## Safety Notes",
|
|
"## Citations",
|
|
]
|
|
|
|
|
|
def validate(path: str) -> ValidationResult:
|
|
errors: List[str] = []
|
|
warnings: List[str] = []
|
|
|
|
md = read_text(path)
|
|
fm, body = extract_front_matter(md)
|
|
|
|
missing = require_keys(fm, REQUIRED_KEYS)
|
|
if missing:
|
|
errors.append(f"Missing required front matter keys: {', '.join(missing)}")
|
|
|
|
if fm.get("packet_type") != "research_packet":
|
|
errors.append(f"packet_type must be 'research_packet' (got: {fm.get('packet_type')!r})")
|
|
|
|
if fm.get("schema_version") != "1":
|
|
errors.append(f"schema_version must be '1' (got: {fm.get('schema_version')!r})")
|
|
|
|
errors.extend(require_sections_in_order(body, REQUIRED_H2))
|
|
|
|
# Safety Notes must include explicit untrusted statement
|
|
if "## Safety Notes" in body:
|
|
if "Untrusted Content Statement" not in body:
|
|
errors.append("Safety Notes must include 'Untrusted Content Statement:'")
|
|
if "Injection Indicators" not in body:
|
|
errors.append("Safety Notes must include 'Injection Indicators:'")
|
|
|
|
# Forbidden content scanning (whole document)
|
|
forbidden_hits = find_forbidden(md)
|
|
if forbidden_hits:
|
|
errors.extend(forbidden_hits)
|
|
|
|
# Basic citation expectation
|
|
if "## Citations" in body and "[C1]" not in body:
|
|
warnings.append("No [C#] citation labels found; ensure citations are present and stable.")
|
|
|
|
return ValidationResult(ok=(len(errors) == 0), errors=errors, warnings=warnings)
|
|
|
|
|
|
def main() -> int:
|
|
if len(sys.argv) != 2:
|
|
print(__doc__.strip(), file=sys.stderr)
|
|
return 3
|
|
path = sys.argv[1]
|
|
try:
|
|
res = validate(path)
|
|
except Exception as e:
|
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
return 3
|
|
|
|
if res.ok:
|
|
for w in res.warnings:
|
|
print(f"WARNING: {w}", file=sys.stderr)
|
|
print("ACCEPT")
|
|
return 0
|
|
else:
|
|
for e in res.errors:
|
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
for w in res.warnings:
|
|
print(f"WARNING: {w}", file=sys.stderr)
|
|
print("REJECT")
|
|
return 2
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|