#!/usr/bin/env python3 """ Validate a Tool Request against schemas/tool-request.schema.md (schema_version=1). Supports two execution backends: - backend=ERA (default): command execution in microVM lane - backend=monty: Python-subset execution in TOOL-EXEC-Lite lane Usage: validate_tool_request.py /path/to/request.md Exit codes: 0 = valid 2 = invalid 3 = error """ from __future__ import annotations import json import re import sys from typing import List, Tuple import keyword from validate_common import ( ValidationResult, extract_front_matter, find_forbidden, read_text, require_keys, require_sections_in_order, ) REQUIRED_KEYS_BASE = [ "request_type", "schema_version", "request_id", "created_utc", "requested_by", "approved_by", "approved_utc", "purpose", "language", "network", "cpu_limit", "memory_limit_mb", "time_limit_sec", ] # Additional optional key (recommended); default is ERA if absent. OPTIONAL_KEYS = ["backend"] # ERA required headings REQUIRED_H2_ERA = [ "## Command", "## Input Files", "## Output Expectations", "## Risk Assessment", ] # Monty required headings (Inputs optional) REQUIRED_H2_MONTY = [ "## Code", "## Output Expectations", "## Risk Assessment", ] # Strong rules for ERA: command must be a single line and must not contain shell chaining/pipes/redirection DANGEROUS_CMD_TOKENS = re.compile(r"[;&|><`]|(\$\()|(\)\s*)", re.IGNORECASE) # Monty code guardrails (best-effort, not a substitute for Monty itself) FORBIDDEN_MONTY_CODE_TOKENS = re.compile( r"\b(import|open|exec|eval|compile|__import__|globals|locals|vars|dir|getattr|setattr|delattr)\b", re.IGNORECASE, ) IDENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") def extract_single_line_after_heading(body: str, heading: str) -> str: lines = body.splitlines() try: i = lines.index(heading) except ValueError: return "" for j in range(i + 1, len(lines)): line = lines[j].strip() if line.startswith("## "): break if line: return line return "" def extract_section_text(body: str, heading: str) -> str: """ Extract full text under a heading until the next H2. """ lines = body.splitlines() try: i = lines.index(heading) except ValueError: return "" out: List[str] = [] for j in range(i + 1, len(lines)): if lines[j].startswith("## "): break out.append(lines[j]) return "\n".join(out).strip() def validate_era(body: str, errors: List[str]) -> None: # Headings errors.extend(require_sections_in_order(body, REQUIRED_H2_ERA)) # Command rules cmd = extract_single_line_after_heading(body, "## Command") if not cmd: errors.append("## Command must contain a single command line.") return if cmd.startswith("```") or cmd.endswith("```"): errors.append("Command must be plain text, not a fenced code block.") if DANGEROUS_CMD_TOKENS.search(cmd): errors.append("Command contains forbidden shell metacharacters (chaining/pipes/redirection/subshell).") lc = cmd.lower() if "pip install" in lc or "apt" in lc or "apt-get" in lc or "npm install" in lc: errors.append("Command appears to install packages; installs are forbidden in TOOL-EXEC.") def validate_monty(fm: dict, body: str, errors: List[str], warnings: List[str]) -> None: # Headings errors.extend(require_sections_in_order(body, REQUIRED_H2_MONTY)) # Monty requires code block text (not fenced) code = extract_section_text(body, "## Code") if not code: errors.append("## Code must contain Monty-executable Python-subset code.") return # For safety, discourage fenced blocks (users can still paste raw code) if "```" in code: errors.append("Monty code must be plain text, not fenced code blocks.") # Inputs JSON is optional but if present must parse as object inputs_json = extract_section_text(body, "## Inputs (JSON)") if inputs_json: try: obj = json.loads(inputs_json) if not isinstance(obj, dict): errors.append("## Inputs (JSON) must be a JSON object/dict.") else: bad_keys = [] for k in obj.keys(): if not isinstance(k, str): bad_keys.append(repr(k)) continue if not IDENT_RE.match(k) or keyword.iskeyword(k): bad_keys.append(k) if bad_keys: errors.append( "## Inputs (JSON) keys must be valid Python identifiers and not keywords. " f"Invalid keys: {', '.join(bad_keys)}" ) except Exception as e: errors.append(f"Invalid JSON in ## Inputs (JSON): {e}") # Backend-policy constraints lang = fm.get("language", "").strip().lower() if lang != "python": errors.append("backend=monty requires language=python.") net = fm.get("network", "").strip().lower() if net != "none": errors.append("backend=monty requires network=none.") # Best-effort code guardrails: these names should not appear in pure-compute Monty lane # (Monty itself blocks many capabilities; this is defense-in-depth and discourages risky patterns.) if FORBIDDEN_MONTY_CODE_TOKENS.search(code): warnings.append( "Monty code contains potentially risky builtins/names (import/open/exec/eval/etc). " "Monty may block these, but review intent carefully." ) def validate(path: str) -> ValidationResult: errors: List[str] = [] warnings: List[str] = [] md = read_text(path) fm, body = extract_front_matter(md) # Base required keys missing = require_keys(fm, REQUIRED_KEYS_BASE) if missing: errors.append(f"Missing required front matter keys: {', '.join(missing)}") if fm.get("request_type") != "tool_request": errors.append(f"request_type must be 'tool_request' (got: {fm.get('request_type')!r})") if fm.get("schema_version") != "1": errors.append(f"schema_version must be '1' (got: {fm.get('schema_version')!r})") # Approval gate: require approved_by and approved_utc if not fm.get("approved_by") or not fm.get("approved_utc"): errors.append("Tool Request must include approved_by and approved_utc (human approval gate).") # language must not be shell if fm.get("language", "").strip().lower() in ("shell", "bash", "sh", "zsh", "powershell", "pwsh", "cmd"): errors.append("language must not be a shell. Use a supported language runtime only.") # network must be none or allowlist net = fm.get("network", "").strip().lower() if net not in ("none", "allowlist"): errors.append("network must be 'none' or 'allowlist'.") # Determine backend (default ERA) backend = (fm.get("backend") or "ERA").strip().lower() if backend not in ("era", "monty"): errors.append("backend must be 'ERA' or 'monty' (default ERA if omitted).") # Backend-specific validation if backend == "era": validate_era(body, errors) elif backend == "monty": validate_monty(fm, body, errors, warnings) # Forbidden content scan (whole doc) forbidden_hits = find_forbidden(md) if forbidden_hits: errors.extend(forbidden_hits) return ValidationResult(ok=(len(errors) == 0), errors=errors, warnings=warnings) def main() -> int: if len(sys.argv) != 2: print(__doc__.strip(), file=sys.stderr) return 3 path = sys.argv[1] try: res = validate(path) except Exception as e: print(f"ERROR: {e}", file=sys.stderr) return 3 if res.ok: for w in res.warnings: print(f"WARNING: {w}", file=sys.stderr) print("ACCEPT") return 0 else: for e in res.errors: print(f"ERROR: {e}", file=sys.stderr) for w in res.warnings: print(f"WARNING: {w}", file=sys.stderr) print("REJECT") return 2 if __name__ == "__main__": raise SystemExit(main())