Keep doclift fixture immutable in tests

This commit is contained in:
welsberr 2026-04-23 07:26:39 -04:00
parent 76ca54327a
commit 1731e0006a
11 changed files with 9 additions and 412 deletions

View File

@ -1,5 +0,0 @@
{"artifact_id": "ia_af72cb1641f3", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.figures.json", "sha256": "f1c6970942981c53761360effdb5e5b590dcf7f0172839d37b636af96c19dadd", "title": "document.figures"}
{"artifact_id": "ia_6cc5265d52f6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.layout.json", "sha256": "9883a8c3bb6acae5295eaf51ae3308f83c8ec4452bb4279b7d370e0ebd5706b3", "title": "document.layout"}
{"artifact_id": "ia_51bdebab22e6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.md", "sha256": "bac0c576c657e5a79a484aa7ec1aee193742ff2627f8f7b100f62530ee1c991d", "title": "document"}
{"artifact_id": "ia_893c59d73929", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.tables.json", "sha256": "a1eda53d353a7be08b3a1d55571c80f29be8fa157ab770fccc22fe3db6053fde", "title": "document.tables"}
{"artifact_id": "ia_ffa5b716b5a5", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "manifest.json", "sha256": "3810d72b9858e1eb69e981759a3901defb75776744ba50f73f426860f05b9b5a", "title": "manifest"}

View File

@ -1 +0,0 @@
{"claim_id": "clm_doclift_1", "claim_kind": "summary", "claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.", "concept_ids": ["concept::lecture-1"], "confidence_hint": 0.85, "contradicts_claim_ids": [], "current_status": "triaged", "grounding_status": "grounded", "import_id": "doclift-test", "source_observation_ids": ["obs_doclift_1"], "supersedes_claim_ids": [], "supporting_fragment_ids": []}

View File

@ -1 +0,0 @@
{"aliases": [], "concept_id": "concept::lecture-1", "current_status": "triaged", "description": "Imported from doclift bundle document kind 'lecture'.", "import_id": "doclift-test", "source_artifact_ids": ["ia_51bdebab22e6"], "title": "Lecture 1. Example"}

View File

@ -1,14 +0,0 @@
{
"import_id": "doclift-test",
"import_mode": "quick",
"summary": {
"artifact_count": 5,
"observation_count": 1,
"claim_count": 1,
"concept_count": 1,
"relation_count": 0,
"error_count": 0,
"warning_count": 0
},
"findings": []
}

View File

@ -1,16 +0,0 @@
{
"import_id": "doclift-test",
"import_mode": "quick",
"machine_id": "nerdanel",
"agent_id": "groundrecall.ingest",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
"imported_at": "2026-04-23T11:22:39Z",
"source_repo_kind": "llmwiki",
"source_adapter": "doclift_bundle",
"import_intent": "both",
"artifact_count": 5,
"observation_count": 1,
"claim_count": 1,
"concept_count": 1,
"relation_count": 0
}

View File

@ -1 +0,0 @@
{"artifact_id": "ia_51bdebab22e6", "confidence_hint": 0.85, "current_status": "draft", "grounding_status": "grounded", "import_id": "doclift-test", "line_end": 0, "line_start": 0, "observation_id": "obs_doclift_1", "origin_path": "documents/lecture-1/document.md", "origin_section": "Lecture 1. Example", "role": "summary", "source_url": "legacy/lecture-1.doc", "support_kind": "direct_source", "text": "Lecture 1. Example"}

View File

@ -1,316 +0,0 @@
{
"reviewer": "GroundRecall Import",
"draft_pack": {
"pack": {
"name": "groundrecall-import-doclift-test",
"display_name": "GroundRecall Import doclift-test",
"version": "0.1.0-draft",
"source_import_id": "doclift-test",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal"
},
"concepts": [
{
"concept_id": "lecture-1",
"title": "Lecture 1. Example",
"description": "Imported from doclift bundle document kind 'lecture'.",
"prerequisites": [],
"mastery_signals": [],
"status": "provisional",
"notes": [
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
]
}
],
"conflicts": [],
"review_flags": [],
"attribution": {
"source_repo_kind": "llmwiki",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
"imported_at": "2026-04-23T11:22:39Z",
"machine_id": "nerdanel",
"rights_note": "Imported llmwiki-style corpus requires review before promotion."
}
},
"citation_reviews": [],
"ledger": [],
"import_context": {
"manifest": {
"import_id": "doclift-test",
"import_mode": "quick",
"machine_id": "nerdanel",
"agent_id": "groundrecall.ingest",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
"imported_at": "2026-04-23T11:22:39Z",
"source_repo_kind": "llmwiki",
"source_adapter": "doclift_bundle",
"import_intent": "both",
"artifact_count": 5,
"observation_count": 1,
"claim_count": 1,
"concept_count": 1,
"relation_count": 0
},
"lint_summary": {
"artifact_count": 5,
"observation_count": 1,
"claim_count": 1,
"concept_count": 1,
"relation_count": 0,
"error_count": 0,
"warning_count": 0
},
"queue_length": 1,
"source_adapter": "doclift_bundle"
},
"review_guidance": {
"overview": "Review concepts first, then inspect representative claims and their source observations before promotion.",
"priorities": [
"Focus reviewer effort on concepts with strong grounded claims and explicit citations first.",
"Downgrade or reject concepts whose claims are fragmented, duplicated, or missing meaningful support.",
"For academic material, citation-bearing claims deserve special scrutiny for fit, contradiction, and fabrication risk."
],
"citation_guidance": [
"A citation key or extracted reference is evidence of traceability, not correctness.",
"Check whether the cited work actually supports the claim and whether the claim overstates it.",
"Use the citation track to prioritize claims that can move into a separate citation-ingestion workflow."
]
},
"field_specs": [
{
"field": "status",
"label": "Review status",
"input": "select",
"required": true,
"options": [
{
"value": "trusted",
"label": "Trusted",
"help": "Promote this concept and its supported claims when the evidence and wording are ready."
},
{
"value": "provisional",
"label": "Provisional",
"help": "Keep this concept in reviewed state when it is promising but still needs citation or wording cleanup."
},
{
"value": "needs_review",
"label": "Needs Review",
"help": "Leave undecided when support, scope, or concept boundaries are still unclear."
},
{
"value": "rejected",
"label": "Rejected",
"help": "Exclude this concept when it is noise, unsupported, duplicated, or misleading."
}
]
},
{
"field": "description",
"label": "Concept description",
"input": "text",
"required": false,
"help": "Refine the concept summary to match the strongest supported interpretation."
},
{
"field": "notes",
"label": "Reviewer notes",
"input": "textarea",
"required": false,
"help": "Record why this concept is trusted, provisional, rejected, or still unclear."
},
{
"field": "prerequisites",
"label": "Prerequisites",
"input": "textarea",
"required": false,
"help": "List prerequisite concepts only when the manuscript support is explicit or defensible."
}
],
"citation_field_specs": [
{
"field": "status",
"label": "Citation review status",
"input": "select",
"required": true,
"options": [
{
"value": "unreviewed",
"label": "Unreviewed",
"help": "Keep this citation candidate in triage until fit and existence are checked."
},
{
"value": "verified",
"label": "Verified",
"help": "The cited work exists and materially supports the associated manuscript claim."
},
{
"value": "needs_source_check",
"label": "Needs Source Check",
"help": "The citation may be useful but still needs direct source inspection or metadata cleanup."
},
{
"value": "misleading",
"label": "Misleading",
"help": "The citation exists but overstates, contradicts, or poorly fits the claim."
},
{
"value": "irrelevant",
"label": "Irrelevant",
"help": "The citation does not materially support the concept or claim under review."
},
{
"value": "fabricated",
"label": "Fabricated",
"help": "The citation appears invented, malformed, or otherwise not real."
}
]
},
{
"field": "notes",
"label": "Citation notes",
"input": "textarea",
"required": false,
"help": "Record whether the cited work exists, fits the claim, or should move into a dedicated citation-ingestion lane."
}
],
"concept_reviews": [
{
"concept_id": "lecture-1",
"title": "Lecture 1. Example",
"status": "provisional",
"description": "Imported from doclift bundle document kind 'lecture'.",
"review_help": "Prefer `trusted` when claims are coherent and citation-bearing support is appropriate; prefer `provisional` when the concept is plausible but still needs citation or wording cleanup.",
"claim_count": 1,
"grounded_claim_count": 1,
"warning_count": 0,
"has_citation_support": false,
"top_claims": [
{
"claim_id": "clm_doclift_1",
"claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.",
"claim_kind": "summary",
"grounding_status": "grounded",
"supporting_observations": [
{
"observation_id": "obs_doclift_1",
"origin_path": "documents/lecture-1/document.md",
"origin_section": "Lecture 1. Example",
"text": "Lecture 1. Example",
"line_start": 0,
"line_end": 0
}
],
"citation_support": [
{
"citation_key_count": 0,
"extracted_reference_count": 0,
"has_citation_support": false
}
],
"artifact_paths": [
"documents/lecture-1/document.md"
],
"finding_messages": []
}
],
"notes": [
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
]
}
],
"bibliography": {
"enabled": false,
"entry_count": 0,
"source_files": []
},
"citations": {
"enabled": true,
"provider": "citegeist",
"artifacts": [
{
"artifact_id": "ia_af72cb1641f3",
"path": "documents/lecture-1/document.figures.json",
"title": "document.figures",
"citation_keys": [],
"resolved_entries": [],
"citation_key_count": 0,
"extracted_references": [],
"extracted_reference_count": 0,
"citegeist_backends": [
"anystyle",
"grobid",
"heuristic"
]
},
{
"artifact_id": "ia_6cc5265d52f6",
"path": "documents/lecture-1/document.layout.json",
"title": "document.layout",
"citation_keys": [],
"resolved_entries": [],
"citation_key_count": 0,
"extracted_references": [],
"extracted_reference_count": 0,
"citegeist_backends": [
"anystyle",
"grobid",
"heuristic"
]
},
{
"artifact_id": "ia_51bdebab22e6",
"path": "documents/lecture-1/document.md",
"title": "document",
"citation_keys": [],
"resolved_entries": [],
"citation_key_count": 0,
"extracted_references": [],
"extracted_reference_count": 0,
"citegeist_backends": [
"anystyle",
"grobid",
"heuristic"
]
},
{
"artifact_id": "ia_893c59d73929",
"path": "documents/lecture-1/document.tables.json",
"title": "document.tables",
"citation_keys": [],
"resolved_entries": [],
"citation_key_count": 0,
"extracted_references": [],
"extracted_reference_count": 0,
"citegeist_backends": [
"anystyle",
"grobid",
"heuristic"
]
},
{
"artifact_id": "ia_ffa5b716b5a5",
"path": "manifest.json",
"title": "manifest",
"citation_keys": [],
"resolved_entries": [],
"citation_key_count": 0,
"extracted_references": [],
"extracted_reference_count": 0,
"citegeist_backends": [
"anystyle",
"grobid",
"heuristic"
]
}
],
"summary": {
"artifact_count_with_citations": 0,
"citation_key_total": 0,
"extracted_reference_total": 0
},
"next_actions": [
"Promote citation-bearing claims into a dedicated citation review lane.",
"Use CiteGeist extraction as a first pass, then verify support and metadata before trusting the citation."
]
}
}

View File

@ -1,20 +0,0 @@
{
"import_id": "doclift-test",
"queue_length": 1,
"items": [
{
"queue_id": "rq_clm_doclift_1",
"candidate_type": "claim",
"candidate_id": "clm_doclift_1",
"title": "Lecture 1. Example is a lecture in the imported doclift bundle.",
"triage_lane": "knowledge_capture",
"priority": 35,
"grounding_status": "grounded",
"status": "needs_review",
"finding_codes": [],
"concept_ids": [
"concept::lecture-1"
]
}
]
}

View File

@ -1,36 +0,0 @@
{
"reviewer": "GroundRecall Import",
"draft_pack": {
"pack": {
"name": "groundrecall-import-doclift-test",
"display_name": "GroundRecall Import doclift-test",
"version": "0.1.0-draft",
"source_import_id": "doclift-test",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal"
},
"concepts": [
{
"concept_id": "lecture-1",
"title": "Lecture 1. Example",
"description": "Imported from doclift bundle document kind 'lecture'.",
"prerequisites": [],
"mastery_signals": [],
"status": "provisional",
"notes": [
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
]
}
],
"conflicts": [],
"review_flags": [],
"attribution": {
"source_repo_kind": "llmwiki",
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
"imported_at": "2026-04-23T11:22:39Z",
"machine_id": "nerdanel",
"rights_note": "Imported llmwiki-style corpus requires review before promotion."
}
},
"citation_reviews": [],
"ledger": []
}

View File

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
import shutil
import groundrecall.ingest as ingest_module import groundrecall.ingest as ingest_module
import groundrecall.source_adapters # noqa: F401 import groundrecall.source_adapters # noqa: F401
@ -12,6 +13,12 @@ def _fixture_doclift_bundle() -> Path:
return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal" return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal"
def _copied_fixture_doclift_bundle(tmp_path: Path) -> Path:
target = tmp_path / "doclift_bundle_minimal"
shutil.copytree(_fixture_doclift_bundle(), target)
return target
def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None: def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None:
names = set(list_source_adapters()) names = set(list_source_adapters())
assert "llmwiki" in names assert "llmwiki" in names
@ -203,8 +210,8 @@ def test_didactopus_pack_import_generates_structured_concepts_and_relations(tmp_
assert "clm_stage_stage1_basics" in claim_ids assert "clm_stage_stage1_basics" in claim_ids
def test_doclift_bundle_import_generates_structured_concepts() -> None: def test_doclift_bundle_import_generates_structured_concepts(tmp_path: Path) -> None:
result = run_groundrecall_import(_fixture_doclift_bundle(), mode="quick", import_id="doclift-test") result = run_groundrecall_import(_copied_fixture_doclift_bundle(tmp_path), mode="quick", import_id="doclift-test")
assert result.manifest["source_adapter"] == "doclift_bundle" assert result.manifest["source_adapter"] == "doclift_bundle"
assert result.manifest["import_intent"] == "both" assert result.manifest["import_intent"] == "both"
concept_ids = {item["concept_id"] for item in result.concepts} concept_ids = {item["concept_id"] for item in result.concepts}