From 1731e0006a9db4f3005b14e73b7b5d41a175e8fb Mon Sep 17 00:00:00 2001 From: welsberr Date: Thu, 23 Apr 2026 07:26:39 -0400 Subject: [PATCH] Keep doclift fixture immutable in tests --- .../imports/doclift-test/artifacts.jsonl | 5 - .../imports/doclift-test/claims.jsonl | 1 - .../imports/doclift-test/concepts.jsonl | 1 - .../imports/doclift-test/lint_findings.json | 14 - .../imports/doclift-test/manifest.json | 16 - .../imports/doclift-test/observations.jsonl | 1 - .../imports/doclift-test/relations.jsonl | 0 .../imports/doclift-test/review_data.json | 316 ------------------ .../imports/doclift-test/review_queue.json | 20 -- .../imports/doclift-test/review_session.json | 36 -- tests/test_groundrecall_source_adapters.py | 11 +- 11 files changed, 9 insertions(+), 412 deletions(-) delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/artifacts.jsonl delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/claims.jsonl delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/concepts.jsonl delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/manifest.json delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/relations.jsonl delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json delete mode 100644 tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/artifacts.jsonl b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/artifacts.jsonl deleted file mode 100644 index 033ad87..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/artifacts.jsonl +++ /dev/null @@ -1,5 +0,0 @@ -{"artifact_id": "ia_af72cb1641f3", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.figures.json", "sha256": "f1c6970942981c53761360effdb5e5b590dcf7f0172839d37b636af96c19dadd", "title": "document.figures"} -{"artifact_id": "ia_6cc5265d52f6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.layout.json", "sha256": "9883a8c3bb6acae5295eaf51ae3308f83c8ec4452bb4279b7d370e0ebd5706b3", "title": "document.layout"} -{"artifact_id": "ia_51bdebab22e6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.md", "sha256": "bac0c576c657e5a79a484aa7ec1aee193742ff2627f8f7b100f62530ee1c991d", "title": "document"} -{"artifact_id": "ia_893c59d73929", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.tables.json", "sha256": "a1eda53d353a7be08b3a1d55571c80f29be8fa157ab770fccc22fe3db6053fde", "title": "document.tables"} -{"artifact_id": "ia_ffa5b716b5a5", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "manifest.json", "sha256": "3810d72b9858e1eb69e981759a3901defb75776744ba50f73f426860f05b9b5a", "title": "manifest"} diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/claims.jsonl b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/claims.jsonl deleted file mode 100644 index 76a4201..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/claims.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"claim_id": "clm_doclift_1", "claim_kind": "summary", "claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.", "concept_ids": ["concept::lecture-1"], "confidence_hint": 0.85, "contradicts_claim_ids": [], "current_status": "triaged", "grounding_status": "grounded", "import_id": "doclift-test", "source_observation_ids": ["obs_doclift_1"], "supersedes_claim_ids": [], "supporting_fragment_ids": []} diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/concepts.jsonl b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/concepts.jsonl deleted file mode 100644 index e231be6..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/concepts.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"aliases": [], "concept_id": "concept::lecture-1", "current_status": "triaged", "description": "Imported from doclift bundle document kind 'lecture'.", "import_id": "doclift-test", "source_artifact_ids": ["ia_51bdebab22e6"], "title": "Lecture 1. Example"} diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json deleted file mode 100644 index 9bb4b85..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "import_id": "doclift-test", - "import_mode": "quick", - "summary": { - "artifact_count": 5, - "observation_count": 1, - "claim_count": 1, - "concept_count": 1, - "relation_count": 0, - "error_count": 0, - "warning_count": 0 - }, - "findings": [] -} \ No newline at end of file diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/manifest.json b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/manifest.json deleted file mode 100644 index b312441..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/manifest.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "import_id": "doclift-test", - "import_mode": "quick", - "machine_id": "nerdanel", - "agent_id": "groundrecall.ingest", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal", - "imported_at": "2026-04-23T11:22:39Z", - "source_repo_kind": "llmwiki", - "source_adapter": "doclift_bundle", - "import_intent": "both", - "artifact_count": 5, - "observation_count": 1, - "claim_count": 1, - "concept_count": 1, - "relation_count": 0 -} \ No newline at end of file diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl deleted file mode 100644 index f4ff9f3..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"artifact_id": "ia_51bdebab22e6", "confidence_hint": 0.85, "current_status": "draft", "grounding_status": "grounded", "import_id": "doclift-test", "line_end": 0, "line_start": 0, "observation_id": "obs_doclift_1", "origin_path": "documents/lecture-1/document.md", "origin_section": "Lecture 1. Example", "role": "summary", "source_url": "legacy/lecture-1.doc", "support_kind": "direct_source", "text": "Lecture 1. Example"} diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/relations.jsonl b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/relations.jsonl deleted file mode 100644 index e69de29..0000000 diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json deleted file mode 100644 index daf96b0..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json +++ /dev/null @@ -1,316 +0,0 @@ -{ - "reviewer": "GroundRecall Import", - "draft_pack": { - "pack": { - "name": "groundrecall-import-doclift-test", - "display_name": "GroundRecall Import doclift-test", - "version": "0.1.0-draft", - "source_import_id": "doclift-test", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal" - }, - "concepts": [ - { - "concept_id": "lecture-1", - "title": "Lecture 1. Example", - "description": "Imported from doclift bundle document kind 'lecture'.", - "prerequisites": [], - "mastery_signals": [], - "status": "provisional", - "notes": [ - "Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]" - ] - } - ], - "conflicts": [], - "review_flags": [], - "attribution": { - "source_repo_kind": "llmwiki", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal", - "imported_at": "2026-04-23T11:22:39Z", - "machine_id": "nerdanel", - "rights_note": "Imported llmwiki-style corpus requires review before promotion." - } - }, - "citation_reviews": [], - "ledger": [], - "import_context": { - "manifest": { - "import_id": "doclift-test", - "import_mode": "quick", - "machine_id": "nerdanel", - "agent_id": "groundrecall.ingest", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal", - "imported_at": "2026-04-23T11:22:39Z", - "source_repo_kind": "llmwiki", - "source_adapter": "doclift_bundle", - "import_intent": "both", - "artifact_count": 5, - "observation_count": 1, - "claim_count": 1, - "concept_count": 1, - "relation_count": 0 - }, - "lint_summary": { - "artifact_count": 5, - "observation_count": 1, - "claim_count": 1, - "concept_count": 1, - "relation_count": 0, - "error_count": 0, - "warning_count": 0 - }, - "queue_length": 1, - "source_adapter": "doclift_bundle" - }, - "review_guidance": { - "overview": "Review concepts first, then inspect representative claims and their source observations before promotion.", - "priorities": [ - "Focus reviewer effort on concepts with strong grounded claims and explicit citations first.", - "Downgrade or reject concepts whose claims are fragmented, duplicated, or missing meaningful support.", - "For academic material, citation-bearing claims deserve special scrutiny for fit, contradiction, and fabrication risk." - ], - "citation_guidance": [ - "A citation key or extracted reference is evidence of traceability, not correctness.", - "Check whether the cited work actually supports the claim and whether the claim overstates it.", - "Use the citation track to prioritize claims that can move into a separate citation-ingestion workflow." - ] - }, - "field_specs": [ - { - "field": "status", - "label": "Review status", - "input": "select", - "required": true, - "options": [ - { - "value": "trusted", - "label": "Trusted", - "help": "Promote this concept and its supported claims when the evidence and wording are ready." - }, - { - "value": "provisional", - "label": "Provisional", - "help": "Keep this concept in reviewed state when it is promising but still needs citation or wording cleanup." - }, - { - "value": "needs_review", - "label": "Needs Review", - "help": "Leave undecided when support, scope, or concept boundaries are still unclear." - }, - { - "value": "rejected", - "label": "Rejected", - "help": "Exclude this concept when it is noise, unsupported, duplicated, or misleading." - } - ] - }, - { - "field": "description", - "label": "Concept description", - "input": "text", - "required": false, - "help": "Refine the concept summary to match the strongest supported interpretation." - }, - { - "field": "notes", - "label": "Reviewer notes", - "input": "textarea", - "required": false, - "help": "Record why this concept is trusted, provisional, rejected, or still unclear." - }, - { - "field": "prerequisites", - "label": "Prerequisites", - "input": "textarea", - "required": false, - "help": "List prerequisite concepts only when the manuscript support is explicit or defensible." - } - ], - "citation_field_specs": [ - { - "field": "status", - "label": "Citation review status", - "input": "select", - "required": true, - "options": [ - { - "value": "unreviewed", - "label": "Unreviewed", - "help": "Keep this citation candidate in triage until fit and existence are checked." - }, - { - "value": "verified", - "label": "Verified", - "help": "The cited work exists and materially supports the associated manuscript claim." - }, - { - "value": "needs_source_check", - "label": "Needs Source Check", - "help": "The citation may be useful but still needs direct source inspection or metadata cleanup." - }, - { - "value": "misleading", - "label": "Misleading", - "help": "The citation exists but overstates, contradicts, or poorly fits the claim." - }, - { - "value": "irrelevant", - "label": "Irrelevant", - "help": "The citation does not materially support the concept or claim under review." - }, - { - "value": "fabricated", - "label": "Fabricated", - "help": "The citation appears invented, malformed, or otherwise not real." - } - ] - }, - { - "field": "notes", - "label": "Citation notes", - "input": "textarea", - "required": false, - "help": "Record whether the cited work exists, fits the claim, or should move into a dedicated citation-ingestion lane." - } - ], - "concept_reviews": [ - { - "concept_id": "lecture-1", - "title": "Lecture 1. Example", - "status": "provisional", - "description": "Imported from doclift bundle document kind 'lecture'.", - "review_help": "Prefer `trusted` when claims are coherent and citation-bearing support is appropriate; prefer `provisional` when the concept is plausible but still needs citation or wording cleanup.", - "claim_count": 1, - "grounded_claim_count": 1, - "warning_count": 0, - "has_citation_support": false, - "top_claims": [ - { - "claim_id": "clm_doclift_1", - "claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.", - "claim_kind": "summary", - "grounding_status": "grounded", - "supporting_observations": [ - { - "observation_id": "obs_doclift_1", - "origin_path": "documents/lecture-1/document.md", - "origin_section": "Lecture 1. Example", - "text": "Lecture 1. Example", - "line_start": 0, - "line_end": 0 - } - ], - "citation_support": [ - { - "citation_key_count": 0, - "extracted_reference_count": 0, - "has_citation_support": false - } - ], - "artifact_paths": [ - "documents/lecture-1/document.md" - ], - "finding_messages": [] - } - ], - "notes": [ - "Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]" - ] - } - ], - "bibliography": { - "enabled": false, - "entry_count": 0, - "source_files": [] - }, - "citations": { - "enabled": true, - "provider": "citegeist", - "artifacts": [ - { - "artifact_id": "ia_af72cb1641f3", - "path": "documents/lecture-1/document.figures.json", - "title": "document.figures", - "citation_keys": [], - "resolved_entries": [], - "citation_key_count": 0, - "extracted_references": [], - "extracted_reference_count": 0, - "citegeist_backends": [ - "anystyle", - "grobid", - "heuristic" - ] - }, - { - "artifact_id": "ia_6cc5265d52f6", - "path": "documents/lecture-1/document.layout.json", - "title": "document.layout", - "citation_keys": [], - "resolved_entries": [], - "citation_key_count": 0, - "extracted_references": [], - "extracted_reference_count": 0, - "citegeist_backends": [ - "anystyle", - "grobid", - "heuristic" - ] - }, - { - "artifact_id": "ia_51bdebab22e6", - "path": "documents/lecture-1/document.md", - "title": "document", - "citation_keys": [], - "resolved_entries": [], - "citation_key_count": 0, - "extracted_references": [], - "extracted_reference_count": 0, - "citegeist_backends": [ - "anystyle", - "grobid", - "heuristic" - ] - }, - { - "artifact_id": "ia_893c59d73929", - "path": "documents/lecture-1/document.tables.json", - "title": "document.tables", - "citation_keys": [], - "resolved_entries": [], - "citation_key_count": 0, - "extracted_references": [], - "extracted_reference_count": 0, - "citegeist_backends": [ - "anystyle", - "grobid", - "heuristic" - ] - }, - { - "artifact_id": "ia_ffa5b716b5a5", - "path": "manifest.json", - "title": "manifest", - "citation_keys": [], - "resolved_entries": [], - "citation_key_count": 0, - "extracted_references": [], - "extracted_reference_count": 0, - "citegeist_backends": [ - "anystyle", - "grobid", - "heuristic" - ] - } - ], - "summary": { - "artifact_count_with_citations": 0, - "citation_key_total": 0, - "extracted_reference_total": 0 - }, - "next_actions": [ - "Promote citation-bearing claims into a dedicated citation review lane.", - "Use CiteGeist extraction as a first pass, then verify support and metadata before trusting the citation." - ] - } -} \ No newline at end of file diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json deleted file mode 100644 index 02fa3cf..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "import_id": "doclift-test", - "queue_length": 1, - "items": [ - { - "queue_id": "rq_clm_doclift_1", - "candidate_type": "claim", - "candidate_id": "clm_doclift_1", - "title": "Lecture 1. Example is a lecture in the imported doclift bundle.", - "triage_lane": "knowledge_capture", - "priority": 35, - "grounding_status": "grounded", - "status": "needs_review", - "finding_codes": [], - "concept_ids": [ - "concept::lecture-1" - ] - } - ] -} \ No newline at end of file diff --git a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json b/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json deleted file mode 100644 index 6355ad6..0000000 --- a/tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "reviewer": "GroundRecall Import", - "draft_pack": { - "pack": { - "name": "groundrecall-import-doclift-test", - "display_name": "GroundRecall Import doclift-test", - "version": "0.1.0-draft", - "source_import_id": "doclift-test", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal" - }, - "concepts": [ - { - "concept_id": "lecture-1", - "title": "Lecture 1. Example", - "description": "Imported from doclift bundle document kind 'lecture'.", - "prerequisites": [], - "mastery_signals": [], - "status": "provisional", - "notes": [ - "Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]" - ] - } - ], - "conflicts": [], - "review_flags": [], - "attribution": { - "source_repo_kind": "llmwiki", - "source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal", - "imported_at": "2026-04-23T11:22:39Z", - "machine_id": "nerdanel", - "rights_note": "Imported llmwiki-style corpus requires review before promotion." - } - }, - "citation_reviews": [], - "ledger": [] -} \ No newline at end of file diff --git a/tests/test_groundrecall_source_adapters.py b/tests/test_groundrecall_source_adapters.py index a9ef892..b00f3c4 100644 --- a/tests/test_groundrecall_source_adapters.py +++ b/tests/test_groundrecall_source_adapters.py @@ -1,6 +1,7 @@ from __future__ import annotations from pathlib import Path +import shutil import groundrecall.ingest as ingest_module import groundrecall.source_adapters # noqa: F401 @@ -12,6 +13,12 @@ def _fixture_doclift_bundle() -> Path: return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal" +def _copied_fixture_doclift_bundle(tmp_path: Path) -> Path: + target = tmp_path / "doclift_bundle_minimal" + shutil.copytree(_fixture_doclift_bundle(), target) + return target + + def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None: names = set(list_source_adapters()) assert "llmwiki" in names @@ -203,8 +210,8 @@ def test_didactopus_pack_import_generates_structured_concepts_and_relations(tmp_ assert "clm_stage_stage1_basics" in claim_ids -def test_doclift_bundle_import_generates_structured_concepts() -> None: - result = run_groundrecall_import(_fixture_doclift_bundle(), mode="quick", import_id="doclift-test") +def test_doclift_bundle_import_generates_structured_concepts(tmp_path: Path) -> None: + result = run_groundrecall_import(_copied_fixture_doclift_bundle(tmp_path), mode="quick", import_id="doclift-test") assert result.manifest["source_adapter"] == "doclift_bundle" assert result.manifest["import_intent"] == "both" concept_ids = {item["concept_id"] for item in result.concepts}