Compare commits

..

2 Commits

4 changed files with 75 additions and 6 deletions

View File

@ -20,15 +20,19 @@ def _read_jsonl(path: Path) -> list[dict[str, Any]]:
return [json.loads(line) for line in text.splitlines()] return [json.loads(line) for line in text.splitlines()]
def _triage_lane(item: dict[str, Any], finding_codes: set[str]) -> str: def _triage_lane(item: dict[str, Any], finding_codes: set[str], graph_codes: set[str] | None = None) -> str:
graph_codes = graph_codes or set()
if {"claim_ungrounded", "ungrounded_summary"} & finding_codes: if {"claim_ungrounded", "ungrounded_summary"} & finding_codes:
return "source_cleanup" return "source_cleanup"
if {"bridge_concept", "isolated_concept", "small_component"} & graph_codes:
return "conflict_resolution"
if {"relation_missing_source", "relation_missing_target", "orphan_concept"} & finding_codes: if {"relation_missing_source", "relation_missing_target", "orphan_concept"} & finding_codes:
return "conflict_resolution" return "conflict_resolution"
return "knowledge_capture" return "knowledge_capture"
def _priority(item: dict[str, Any], finding_codes: set[str]) -> int: def _priority(item: dict[str, Any], finding_codes: set[str], graph_codes: set[str] | None = None) -> int:
graph_codes = graph_codes or set()
priority = 50 priority = 50
if item.get("grounding_status") == "grounded": if item.get("grounding_status") == "grounded":
priority -= 10 priority -= 10
@ -37,6 +41,12 @@ def _priority(item: dict[str, Any], finding_codes: set[str]) -> int:
if any(code.startswith("claim_") or code.startswith("relation_") for code in finding_codes): if any(code.startswith("claim_") or code.startswith("relation_") for code in finding_codes):
priority += 20 priority += 20
priority -= min(len(finding_codes) * 2, 10) priority -= min(len(finding_codes) * 2, 10)
if "bridge_concept" in graph_codes:
priority -= 10
if "isolated_concept" in graph_codes:
priority -= 6
if "small_component" in graph_codes:
priority -= 4
return max(priority, 1) return max(priority, 1)
@ -44,12 +54,14 @@ def build_review_queue(import_dir: str | Path) -> dict[str, Any]:
base = Path(import_dir) base = Path(import_dir)
manifest = _read_json(base / "manifest.json") manifest = _read_json(base / "manifest.json")
lint_payload = _read_json(base / "lint_findings.json") lint_payload = _read_json(base / "lint_findings.json")
graph_payload = _read_json(base / "graph_diagnostics.json")
claims = _read_jsonl(base / "claims.jsonl") claims = _read_jsonl(base / "claims.jsonl")
concepts = _read_jsonl(base / "concepts.jsonl") concepts = _read_jsonl(base / "concepts.jsonl")
findings_by_target: defaultdict[str, list[dict[str, Any]]] = defaultdict(list) findings_by_target: defaultdict[str, list[dict[str, Any]]] = defaultdict(list)
for finding in lint_payload.get("findings", []): for finding in lint_payload.get("findings", []):
findings_by_target[finding["target_id"]].append(finding) findings_by_target[finding["target_id"]].append(finding)
graph_codes_by_concept = _graph_codes_by_concept(graph_payload)
queue: list[dict[str, Any]] = [] queue: list[dict[str, Any]] = []
@ -74,7 +86,8 @@ def build_review_queue(import_dir: str | Path) -> dict[str, Any]:
for concept in concepts: for concept in concepts:
related = findings_by_target.get(concept["concept_id"], []) related = findings_by_target.get(concept["concept_id"], [])
finding_codes = {item["code"] for item in related} finding_codes = {item["code"] for item in related}
if not finding_codes: graph_codes = graph_codes_by_concept.get(concept["concept_id"], set())
if not finding_codes and not graph_codes:
continue continue
queue.append( queue.append(
{ {
@ -82,12 +95,13 @@ def build_review_queue(import_dir: str | Path) -> dict[str, Any]:
"candidate_type": "concept", "candidate_type": "concept",
"candidate_id": concept["concept_id"], "candidate_id": concept["concept_id"],
"title": concept["title"], "title": concept["title"],
"triage_lane": _triage_lane(concept, finding_codes), "triage_lane": _triage_lane(concept, finding_codes, graph_codes),
"priority": _priority(concept, finding_codes), "priority": _priority(concept, finding_codes, graph_codes),
"grounding_status": concept.get("grounding_status", "triaged"), "grounding_status": concept.get("grounding_status", "triaged"),
"status": "needs_review", "status": "needs_review",
"finding_codes": sorted(finding_codes), "finding_codes": sorted(finding_codes | graph_codes),
"concept_ids": [concept["concept_id"]], "concept_ids": [concept["concept_id"]],
"graph_codes": sorted(graph_codes),
} }
) )
@ -99,6 +113,24 @@ def build_review_queue(import_dir: str | Path) -> dict[str, Any]:
} }
def _graph_codes_by_concept(graph_payload: dict[str, Any]) -> dict[str, set[str]]:
codes: defaultdict[str, set[str]] = defaultdict(set)
components = graph_payload.get("components", [])
for component in components:
concept_ids = [str(item) for item in component.get("concept_ids", [])]
size = int(component.get("size", len(concept_ids)))
if size == 1 and concept_ids:
codes[concept_ids[0]].add("isolated_concept")
elif 1 < size <= 2:
for concept_id in concept_ids:
codes[concept_id].add("small_component")
for bridge in graph_payload.get("bridge_concepts", []):
concept_id = str(bridge.get("concept_id", ""))
if concept_id:
codes[concept_id].add("bridge_concept")
return codes
def build_parser() -> argparse.ArgumentParser: def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Build a GroundRecall review queue from import artifacts.") parser = argparse.ArgumentParser(description="Build a GroundRecall review queue from import artifacts.")
parser.add_argument("import_dir") parser.add_argument("import_dir")

View File

@ -331,11 +331,17 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
source_root=resolved_source_root, source_root=resolved_source_root,
) )
artifact_by_id = {item["artifact_id"]: item for item in artifacts} artifact_by_id = {item["artifact_id"]: item for item in artifacts}
queue_by_candidate_id = {
str(item.get("candidate_id", "")): item
for item in queue_payload.get("items", [])
if item.get("candidate_type") == "concept"
}
concept_reviews: list[dict[str, Any]] = [] concept_reviews: list[dict[str, Any]] = []
for concept in session.draft_pack.concepts: for concept in session.draft_pack.concepts:
full_concept_id = f"concept::{concept.concept_id}" if not concept.concept_id.startswith("concept::") else concept.concept_id full_concept_id = f"concept::{concept.concept_id}" if not concept.concept_id.startswith("concept::") else concept.concept_id
concept_claims = claims_by_concept.get(full_concept_id, []) concept_claims = claims_by_concept.get(full_concept_id, [])
queue_entry = queue_by_candidate_id.get(full_concept_id, {})
claim_payloads: list[dict[str, Any]] = [] claim_payloads: list[dict[str, Any]] = []
has_citation_support = False has_citation_support = False
for claim in concept_claims[:25]: for claim in concept_claims[:25]:
@ -380,6 +386,10 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
"grounded_claim_count": sum(1 for item in concept_claims if item.get("grounding_status") == "grounded"), "grounded_claim_count": sum(1 for item in concept_claims if item.get("grounding_status") == "grounded"),
"warning_count": len(findings_by_target.get(full_concept_id, [])), "warning_count": len(findings_by_target.get(full_concept_id, [])),
"has_citation_support": has_citation_support, "has_citation_support": has_citation_support,
"review_priority": int(queue_entry.get("priority", 50)),
"triage_lane": str(queue_entry.get("triage_lane", "knowledge_capture")),
"finding_codes": list(queue_entry.get("finding_codes", [])),
"graph_codes": list(queue_entry.get("graph_codes", [])),
"top_claims": claim_payloads, "top_claims": claim_payloads,
"notes": list(concept.notes), "notes": list(concept.notes),
} }
@ -392,6 +402,7 @@ def _build_import_review_payload(session: ReviewSession, import_dir: Path) -> di
"queue_length": queue_payload.get("queue_length", 0), "queue_length": queue_payload.get("queue_length", 0),
"source_adapter": manifest.get("source_adapter", ""), "source_adapter": manifest.get("source_adapter", ""),
"graph_summary": graph_payload.get("summary", {}), "graph_summary": graph_payload.get("summary", {}),
"top_queue_items": queue_payload.get("items", [])[:10],
}, },
"review_guidance": { "review_guidance": {
"overview": ( "overview": (

View File

@ -74,6 +74,7 @@ def test_groundrecall_import_emits_normalized_artifacts(tmp_path: Path) -> None:
review_queue = json.loads((result.out_dir / "review_queue.json").read_text(encoding="utf-8")) review_queue = json.loads((result.out_dir / "review_queue.json").read_text(encoding="utf-8"))
assert review_queue["queue_length"] >= 1 assert review_queue["queue_length"] >= 1
assert any(item["candidate_type"] == "claim" for item in review_queue["items"]) assert any(item["candidate_type"] == "claim" for item in review_queue["items"])
assert any(item["candidate_type"] == "concept" for item in review_queue["items"])
review_session = json.loads((result.out_dir / "review_session.json").read_text(encoding="utf-8")) review_session = json.loads((result.out_dir / "review_session.json").read_text(encoding="utf-8"))
assert review_session["reviewer"] == "GroundRecall Import" assert review_session["reviewer"] == "GroundRecall Import"
assert review_session["draft_pack"]["pack"]["source_import_id"] == "import-test" assert review_session["draft_pack"]["pack"]["source_import_id"] == "import-test"
@ -151,6 +152,25 @@ def test_graph_diagnostics_detect_bridge_concepts() -> None:
assert [item["concept_id"] for item in diagnostics["bridge_concepts"]] == ["concept::b", "concept::c"] assert [item["concept_id"] for item in diagnostics["bridge_concepts"]] == ["concept::b", "concept::c"]
def test_review_queue_uses_graph_diagnostics_for_concept_triage(tmp_path: Path) -> None:
root = tmp_path / "llmwiki"
(root / "wiki").mkdir(parents=True)
(root / "wiki" / "a.md").write_text("# A\n\nSee also [[B]].\n", encoding="utf-8")
(root / "wiki" / "b.md").write_text("# B\n\nSee also [[C]].\n", encoding="utf-8")
(root / "wiki" / "c.md").write_text("# C\n", encoding="utf-8")
(root / "wiki" / "isolated.md").write_text("# Isolated\n", encoding="utf-8")
result = run_groundrecall_import(root, mode="quick", import_id="graph-queue-test")
review_queue = json.loads((result.out_dir / "review_queue.json").read_text(encoding="utf-8"))
concept_items = {item["candidate_id"]: item for item in review_queue["items"] if item["candidate_type"] == "concept"}
assert concept_items["concept::b"]["triage_lane"] == "conflict_resolution"
assert "bridge_concept" in concept_items["concept::b"]["graph_codes"]
assert concept_items["concept::isolated"]["triage_lane"] == "conflict_resolution"
assert "isolated_concept" in concept_items["concept::isolated"]["graph_codes"]
assert concept_items["concept::b"]["priority"] < concept_items["concept::isolated"]["priority"]
def test_groundrecall_import_parses_explicit_claim_relations(tmp_path: Path) -> None: def test_groundrecall_import_parses_explicit_claim_relations(tmp_path: Path) -> None:
root = tmp_path / "llmwiki" root = tmp_path / "llmwiki"
(root / "wiki").mkdir(parents=True) (root / "wiki").mkdir(parents=True)

View File

@ -57,7 +57,13 @@ def test_review_workspace_populates_and_persists_citation_reviews(tmp_path: Path
assert any(item["citation_review_id"] == citation_review_id for item in review_data["citation_reviews"]) assert any(item["citation_review_id"] == citation_review_id for item in review_data["citation_reviews"])
assert "graph_diagnostics" in review_data assert "graph_diagnostics" in review_data
assert "graph_summary" in review_data["import_context"] assert "graph_summary" in review_data["import_context"]
assert "top_queue_items" in review_data["import_context"]
assert review_data["graph_diagnostics"]["summary"]["concept_count"] >= 1 assert review_data["graph_diagnostics"]["summary"]["concept_count"] >= 1
concept_review = next(item for item in review_data["concept_reviews"] if item["concept_id"] == "learning-theory")
assert "review_priority" in concept_review
assert "triage_lane" in concept_review
assert "finding_codes" in concept_review
assert "graph_codes" in concept_review
def test_review_workspace_resolves_citation_metadata_from_bibtex(tmp_path: Path) -> None: def test_review_workspace_resolves_citation_metadata_from_bibtex(tmp_path: Path) -> None: