Add secondary review product lanes

2026-05-08 11:09:48 -04:00 · 2026-05-08 11:09:48 -04:00 · 2e82dfd5a7
parent 0999ec35cd
commit 2e82dfd5a7
1 changed files with 64 additions and 0 deletions
--- a/src/groundrecall/review_export.py
+++ b/src/groundrecall/review_export.py
@ -287,6 +287,49 @@ def _claim_analysis_metadata(claim: dict[str, Any]) -> dict[str, Any]:
    }
 def _claim_secondary_products(claim: dict[str, Any]) -> dict[str, Any]:
    text = str(claim.get("claim_text", "")).strip()
    lowered = text.lower()
    claim_kind = str(claim.get("claim_kind", "")).strip().lower()
    definition_candidate = False
    qualification_candidate = False
    constraint_candidate = False
    quote_candidate = False
    if text:
        if re.search(r"\b(is|are|means|refers to|defined as|describes)\b", lowered):
            definition_candidate = True
        if re.search(r"\b(however|although|but|except|unless|only if|in some cases|under some conditions|may not|does not always)\b", lowered):
            qualification_candidate = True
        if re.search(r"\b(must|requires|required|cannot|depends on|limited to|constraint|scope)\b", lowered):
            constraint_candidate = True
        if claim_kind in {"quote", "quotation"}:
            quote_candidate = True
        elif re.search(r"[\"“”]", text) and len(text) >= 40:
            quote_candidate = True
        elif len(text) >= 140 and text.endswith((".", "!", '"', "”")):
            quote_candidate = True
    labels: list[str] = []
    if definition_candidate:
        labels.append("definition")
    if qualification_candidate:
        labels.append("qualification")
    if constraint_candidate:
        labels.append("constraint")
    if quote_candidate:
        labels.append("quote_candidate")
    return {
        "definition_candidate": definition_candidate,
        "qualification_candidate": qualification_candidate,
        "constraint_candidate": constraint_candidate,
        "quote_candidate": quote_candidate,
        "secondary_labels": labels,
    }
 def build_citation_review_entries_from_import(import_dir: str | Path) -> list[CitationReviewEntry]:
    base = Path(import_dir)
    manifest = _read_json(base / "manifest.json")
@ -408,13 +451,17 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
        claim_payloads: list[dict[str, Any]] = []
        has_citation_support = False
        lane_counts: dict[str, int] = defaultdict(int)
        secondary_counts: dict[str, int] = defaultdict(int)
        for claim in concept_claims[:25]:
            supporting_observations = [observations_by_id[item] for item in claim.get("source_observation_ids", []) if item in observations_by_id]
            artifact_ids = {item["artifact_id"] for item in supporting_observations}
            citation_support = [artifact_citation_summary.get(artifact_id, {}) for artifact_id in artifact_ids]
            has_citation_support = has_citation_support or any(item.get("has_citation_support") for item in citation_support)
            analysis = _claim_analysis_metadata(claim)
            secondary = _claim_secondary_products(claim)
            lane_counts[analysis["analysis_lane"]] += 1
            for label in secondary["secondary_labels"]:
                secondary_counts[label] += 1
            cited_keys = {
                key
                for artifact_id in artifact_ids
@ -438,6 +485,11 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
                    "analysis_lane": analysis["analysis_lane"],
                    "argument_role": analysis["argument_role"],
                    "risk_flags": analysis["risk_flags"],
                    "definition_candidate": secondary["definition_candidate"],
                    "qualification_candidate": secondary["qualification_candidate"],
                    "constraint_candidate": secondary["constraint_candidate"],
                    "quote_candidate": secondary["quote_candidate"],
                    "secondary_labels": secondary["secondary_labels"],
                    "grounding_status": claim.get("grounding_status", "unknown"),
                    "supporting_observations": [
                        {
@ -477,6 +529,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
                "finding_codes": list(queue_entry.get("finding_codes", [])),
                "graph_codes": list(queue_entry.get("graph_codes", [])),
                "analysis_lanes": dict(sorted(lane_counts.items())),
                "secondary_products": dict(sorted(secondary_counts.items())),
                "top_claims": claim_payloads,
                "notes": list(concept.notes),
            }
@ -507,12 +560,23 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
                "Rhetorical lane: bundling, overstatement, equivocation, or burden shifting.",
                "Research-program lane: what evidence or experiments would reduce the objection.",
            ],
            "secondary_products": [
                "Definition candidates: source-grounded terminology or explicit meaning statements.",
                "Qualification candidates: scope, exceptions, caveats, or cautionary modifiers.",
                "Constraint candidates: requirements, limits, dependencies, and non-equivalence conditions.",
                "Quote candidates: attributed wording useful for workbench argumentation, not default Notebook prose.",
            ],
            "citation_guidance": [
                "A citation key or extracted reference is evidence of traceability, not correctness.",
                "Check whether the cited work actually supports the claim and whether the claim overstates it.",
                "Use the citation track to prioritize claims that can move into a separate citation-ingestion workflow.",
                "Treat abstract-based support suggestions as triage help, not as a substitute for direct source inspection.",
            ],
            "public_output_policy": [
                "Direct quotations should remain visibly marked and source-attributed.",
                "Public Notebook exposition should paraphrase source material unless a quote is intentionally displayed.",
                "Do not surface unmarked source wording as if it were original Notebook prose.",
            ],
        },
        "field_specs": [
            _status_field_spec(),