Harden doclift bundle workflow and fixtures
This commit is contained in:
parent
e819f17607
commit
76ca54327a
|
|
@ -1,3 +1,8 @@
|
|||
# GroundRecall
|
||||
|
||||
GroundRecall is a human-reviewable/AI usable knowledge layer with capabilities to meet or exceed 'llmwiki' with 'v2' specifications, plus an import path for existing llmwiki instances, and integration with Didactopus for review workflows and knowledge merging.
|
||||
GroundRecall is a human-reviewable/AI usable knowledge layer with capabilities to meet or exceed 'llmwiki' with 'v2' specifications, plus an import path for existing llmwiki instances, and integration with Didactopus for review workflows and knowledge merging.
|
||||
|
||||
`GroundRecall` can also import normalized `doclift` bundles directly when the
|
||||
source material began as legacy office documents and you want a provenance-aware
|
||||
knowledge import without going through a learner pack first. See
|
||||
`docs/quickstart.md` for the minimal `doclift -> GroundRecall` flow.
|
||||
|
|
|
|||
|
|
@ -33,6 +33,21 @@ groundrecall import /path/to/llmwiki --mode grounded
|
|||
|
||||
The importer writes normalized artifacts under `imports/<import-id>/`.
|
||||
|
||||
Import from a normalized `doclift` bundle:
|
||||
|
||||
```bash
|
||||
groundrecall import /path/to/doclift-bundle --mode quick
|
||||
```
|
||||
|
||||
This path is intended for legacy-document corpora that were first normalized by
|
||||
`doclift`. If you want a learner-facing pack first, use Didactopus in between:
|
||||
|
||||
```bash
|
||||
doclift convert-dir /path/to/legacy-course /tmp/doclift-bundle --asset-root /path/to/legacy-course
|
||||
didactopus doclift-bundle /tmp/doclift-bundle /tmp/didactopus-pack --course-title "Example Course"
|
||||
groundrecall import /tmp/doclift-bundle --mode quick
|
||||
```
|
||||
|
||||
## Review And Promote
|
||||
|
||||
Inspect the import outputs:
|
||||
|
|
|
|||
|
|
@ -10,6 +10,14 @@ from .base import DiscoveredImportSource, StructuredImportRows, register_source_
|
|||
class DocliftBundleSourceAdapter:
|
||||
name = "doclift_bundle"
|
||||
|
||||
def _resolve_bundle_path(self, base: Path, value: str | Path | None) -> Path:
|
||||
if value is None:
|
||||
return Path()
|
||||
path = Path(value)
|
||||
if path.is_absolute():
|
||||
return path
|
||||
return base / path
|
||||
|
||||
def detect(self, root: str | Path) -> bool:
|
||||
base = Path(root)
|
||||
return (base / "manifest.json").exists() and (base / "documents").exists()
|
||||
|
|
@ -69,10 +77,13 @@ class DocliftBundleSourceAdapter:
|
|||
for index, document in enumerate(documents, start=1):
|
||||
title = str(document.get("title") or f"Document {index}")
|
||||
concept_id = f"concept::{document.get('document_id') or title.lower().replace(' ', '-')}"
|
||||
markdown_path = Path(document.get("markdown_path", ""))
|
||||
relative_markdown = markdown_path.relative_to(base).as_posix() if markdown_path.is_absolute() and markdown_path.exists() and markdown_path.is_relative_to(base) else document.get("markdown_path", "")
|
||||
markdown_path = self._resolve_bundle_path(base, document.get("markdown_path", ""))
|
||||
if markdown_path.exists():
|
||||
relative_markdown = markdown_path.relative_to(base).as_posix()
|
||||
else:
|
||||
relative_markdown = str(document.get("markdown_path", ""))
|
||||
artifact_id = artifact_by_path.get(str(relative_markdown), "")
|
||||
figures_path = Path(document.get("figures_path", ""))
|
||||
figures_path = self._resolve_bundle_path(base, document.get("figures_path", ""))
|
||||
figure_payload = {}
|
||||
if figures_path.exists():
|
||||
figure_payload = json.loads(figures_path.read_text(encoding="utf-8"))
|
||||
|
|
|
|||
9
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json
vendored
Executable file
9
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json
vendored
Executable file
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"source_path": "legacy/lecture-1.doc",
|
||||
"figure_references": [
|
||||
{
|
||||
"label": "Figure 1",
|
||||
"caption": "Example figure caption"
|
||||
}
|
||||
]
|
||||
}
|
||||
8
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json
vendored
Executable file
8
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json
vendored
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
{
|
||||
"line_index": 0,
|
||||
"text": "Lecture 1. Example",
|
||||
"kind": "heading",
|
||||
"indent": 0
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# Lecture 1. Example
|
||||
|
||||
## Module A
|
||||
|
||||
### Lesson A
|
||||
|
||||
- Objective: Explain lesson A.
|
||||
|
||||
Body text that grounds the example lesson.
|
||||
13
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json
vendored
Executable file
13
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json
vendored
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"source_path": "legacy/lecture-1.doc",
|
||||
"tables": [
|
||||
{
|
||||
"table_id": "table-1",
|
||||
"caption": "Example table",
|
||||
"rows": [
|
||||
["Column A", "Column B"],
|
||||
["1", "2"]
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{"artifact_id": "ia_af72cb1641f3", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.figures.json", "sha256": "f1c6970942981c53761360effdb5e5b590dcf7f0172839d37b636af96c19dadd", "title": "document.figures"}
|
||||
{"artifact_id": "ia_6cc5265d52f6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.layout.json", "sha256": "9883a8c3bb6acae5295eaf51ae3308f83c8ec4452bb4279b7d370e0ebd5706b3", "title": "document.layout"}
|
||||
{"artifact_id": "ia_51bdebab22e6", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.md", "sha256": "bac0c576c657e5a79a484aa7ec1aee193742ff2627f8f7b100f62530ee1c991d", "title": "document"}
|
||||
{"artifact_id": "ia_893c59d73929", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "documents/lecture-1/document.tables.json", "sha256": "a1eda53d353a7be08b3a1d55571c80f29be8fa157ab770fccc22fe3db6053fde", "title": "document.tables"}
|
||||
{"artifact_id": "ia_ffa5b716b5a5", "artifact_kind": "doclift_bundle_artifact", "created_at": "2026-04-23T11:22:39Z", "current_status": "draft", "import_id": "doclift-test", "metadata": {"source_kind": "doclift_bundle"}, "path": "manifest.json", "sha256": "3810d72b9858e1eb69e981759a3901defb75776744ba50f73f426860f05b9b5a", "title": "manifest"}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"claim_id": "clm_doclift_1", "claim_kind": "summary", "claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.", "concept_ids": ["concept::lecture-1"], "confidence_hint": 0.85, "contradicts_claim_ids": [], "current_status": "triaged", "grounding_status": "grounded", "import_id": "doclift-test", "source_observation_ids": ["obs_doclift_1"], "supersedes_claim_ids": [], "supporting_fragment_ids": []}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"aliases": [], "concept_id": "concept::lecture-1", "current_status": "triaged", "description": "Imported from doclift bundle document kind 'lecture'.", "import_id": "doclift-test", "source_artifact_ids": ["ia_51bdebab22e6"], "title": "Lecture 1. Example"}
|
||||
14
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json
vendored
Normal file
14
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/lint_findings.json
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"import_id": "doclift-test",
|
||||
"import_mode": "quick",
|
||||
"summary": {
|
||||
"artifact_count": 5,
|
||||
"observation_count": 1,
|
||||
"claim_count": 1,
|
||||
"concept_count": 1,
|
||||
"relation_count": 0,
|
||||
"error_count": 0,
|
||||
"warning_count": 0
|
||||
},
|
||||
"findings": []
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"import_id": "doclift-test",
|
||||
"import_mode": "quick",
|
||||
"machine_id": "nerdanel",
|
||||
"agent_id": "groundrecall.ingest",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
|
||||
"imported_at": "2026-04-23T11:22:39Z",
|
||||
"source_repo_kind": "llmwiki",
|
||||
"source_adapter": "doclift_bundle",
|
||||
"import_intent": "both",
|
||||
"artifact_count": 5,
|
||||
"observation_count": 1,
|
||||
"claim_count": 1,
|
||||
"concept_count": 1,
|
||||
"relation_count": 0
|
||||
}
|
||||
1
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl
vendored
Normal file
1
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/observations.jsonl
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"artifact_id": "ia_51bdebab22e6", "confidence_hint": 0.85, "current_status": "draft", "grounding_status": "grounded", "import_id": "doclift-test", "line_end": 0, "line_start": 0, "observation_id": "obs_doclift_1", "origin_path": "documents/lecture-1/document.md", "origin_section": "Lecture 1. Example", "role": "summary", "source_url": "legacy/lecture-1.doc", "support_kind": "direct_source", "text": "Lecture 1. Example"}
|
||||
316
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json
vendored
Normal file
316
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_data.json
vendored
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
{
|
||||
"reviewer": "GroundRecall Import",
|
||||
"draft_pack": {
|
||||
"pack": {
|
||||
"name": "groundrecall-import-doclift-test",
|
||||
"display_name": "GroundRecall Import doclift-test",
|
||||
"version": "0.1.0-draft",
|
||||
"source_import_id": "doclift-test",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal"
|
||||
},
|
||||
"concepts": [
|
||||
{
|
||||
"concept_id": "lecture-1",
|
||||
"title": "Lecture 1. Example",
|
||||
"description": "Imported from doclift bundle document kind 'lecture'.",
|
||||
"prerequisites": [],
|
||||
"mastery_signals": [],
|
||||
"status": "provisional",
|
||||
"notes": [
|
||||
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"conflicts": [],
|
||||
"review_flags": [],
|
||||
"attribution": {
|
||||
"source_repo_kind": "llmwiki",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
|
||||
"imported_at": "2026-04-23T11:22:39Z",
|
||||
"machine_id": "nerdanel",
|
||||
"rights_note": "Imported llmwiki-style corpus requires review before promotion."
|
||||
}
|
||||
},
|
||||
"citation_reviews": [],
|
||||
"ledger": [],
|
||||
"import_context": {
|
||||
"manifest": {
|
||||
"import_id": "doclift-test",
|
||||
"import_mode": "quick",
|
||||
"machine_id": "nerdanel",
|
||||
"agent_id": "groundrecall.ingest",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
|
||||
"imported_at": "2026-04-23T11:22:39Z",
|
||||
"source_repo_kind": "llmwiki",
|
||||
"source_adapter": "doclift_bundle",
|
||||
"import_intent": "both",
|
||||
"artifact_count": 5,
|
||||
"observation_count": 1,
|
||||
"claim_count": 1,
|
||||
"concept_count": 1,
|
||||
"relation_count": 0
|
||||
},
|
||||
"lint_summary": {
|
||||
"artifact_count": 5,
|
||||
"observation_count": 1,
|
||||
"claim_count": 1,
|
||||
"concept_count": 1,
|
||||
"relation_count": 0,
|
||||
"error_count": 0,
|
||||
"warning_count": 0
|
||||
},
|
||||
"queue_length": 1,
|
||||
"source_adapter": "doclift_bundle"
|
||||
},
|
||||
"review_guidance": {
|
||||
"overview": "Review concepts first, then inspect representative claims and their source observations before promotion.",
|
||||
"priorities": [
|
||||
"Focus reviewer effort on concepts with strong grounded claims and explicit citations first.",
|
||||
"Downgrade or reject concepts whose claims are fragmented, duplicated, or missing meaningful support.",
|
||||
"For academic material, citation-bearing claims deserve special scrutiny for fit, contradiction, and fabrication risk."
|
||||
],
|
||||
"citation_guidance": [
|
||||
"A citation key or extracted reference is evidence of traceability, not correctness.",
|
||||
"Check whether the cited work actually supports the claim and whether the claim overstates it.",
|
||||
"Use the citation track to prioritize claims that can move into a separate citation-ingestion workflow."
|
||||
]
|
||||
},
|
||||
"field_specs": [
|
||||
{
|
||||
"field": "status",
|
||||
"label": "Review status",
|
||||
"input": "select",
|
||||
"required": true,
|
||||
"options": [
|
||||
{
|
||||
"value": "trusted",
|
||||
"label": "Trusted",
|
||||
"help": "Promote this concept and its supported claims when the evidence and wording are ready."
|
||||
},
|
||||
{
|
||||
"value": "provisional",
|
||||
"label": "Provisional",
|
||||
"help": "Keep this concept in reviewed state when it is promising but still needs citation or wording cleanup."
|
||||
},
|
||||
{
|
||||
"value": "needs_review",
|
||||
"label": "Needs Review",
|
||||
"help": "Leave undecided when support, scope, or concept boundaries are still unclear."
|
||||
},
|
||||
{
|
||||
"value": "rejected",
|
||||
"label": "Rejected",
|
||||
"help": "Exclude this concept when it is noise, unsupported, duplicated, or misleading."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "description",
|
||||
"label": "Concept description",
|
||||
"input": "text",
|
||||
"required": false,
|
||||
"help": "Refine the concept summary to match the strongest supported interpretation."
|
||||
},
|
||||
{
|
||||
"field": "notes",
|
||||
"label": "Reviewer notes",
|
||||
"input": "textarea",
|
||||
"required": false,
|
||||
"help": "Record why this concept is trusted, provisional, rejected, or still unclear."
|
||||
},
|
||||
{
|
||||
"field": "prerequisites",
|
||||
"label": "Prerequisites",
|
||||
"input": "textarea",
|
||||
"required": false,
|
||||
"help": "List prerequisite concepts only when the manuscript support is explicit or defensible."
|
||||
}
|
||||
],
|
||||
"citation_field_specs": [
|
||||
{
|
||||
"field": "status",
|
||||
"label": "Citation review status",
|
||||
"input": "select",
|
||||
"required": true,
|
||||
"options": [
|
||||
{
|
||||
"value": "unreviewed",
|
||||
"label": "Unreviewed",
|
||||
"help": "Keep this citation candidate in triage until fit and existence are checked."
|
||||
},
|
||||
{
|
||||
"value": "verified",
|
||||
"label": "Verified",
|
||||
"help": "The cited work exists and materially supports the associated manuscript claim."
|
||||
},
|
||||
{
|
||||
"value": "needs_source_check",
|
||||
"label": "Needs Source Check",
|
||||
"help": "The citation may be useful but still needs direct source inspection or metadata cleanup."
|
||||
},
|
||||
{
|
||||
"value": "misleading",
|
||||
"label": "Misleading",
|
||||
"help": "The citation exists but overstates, contradicts, or poorly fits the claim."
|
||||
},
|
||||
{
|
||||
"value": "irrelevant",
|
||||
"label": "Irrelevant",
|
||||
"help": "The citation does not materially support the concept or claim under review."
|
||||
},
|
||||
{
|
||||
"value": "fabricated",
|
||||
"label": "Fabricated",
|
||||
"help": "The citation appears invented, malformed, or otherwise not real."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "notes",
|
||||
"label": "Citation notes",
|
||||
"input": "textarea",
|
||||
"required": false,
|
||||
"help": "Record whether the cited work exists, fits the claim, or should move into a dedicated citation-ingestion lane."
|
||||
}
|
||||
],
|
||||
"concept_reviews": [
|
||||
{
|
||||
"concept_id": "lecture-1",
|
||||
"title": "Lecture 1. Example",
|
||||
"status": "provisional",
|
||||
"description": "Imported from doclift bundle document kind 'lecture'.",
|
||||
"review_help": "Prefer `trusted` when claims are coherent and citation-bearing support is appropriate; prefer `provisional` when the concept is plausible but still needs citation or wording cleanup.",
|
||||
"claim_count": 1,
|
||||
"grounded_claim_count": 1,
|
||||
"warning_count": 0,
|
||||
"has_citation_support": false,
|
||||
"top_claims": [
|
||||
{
|
||||
"claim_id": "clm_doclift_1",
|
||||
"claim_text": "Lecture 1. Example is a lecture in the imported doclift bundle.",
|
||||
"claim_kind": "summary",
|
||||
"grounding_status": "grounded",
|
||||
"supporting_observations": [
|
||||
{
|
||||
"observation_id": "obs_doclift_1",
|
||||
"origin_path": "documents/lecture-1/document.md",
|
||||
"origin_section": "Lecture 1. Example",
|
||||
"text": "Lecture 1. Example",
|
||||
"line_start": 0,
|
||||
"line_end": 0
|
||||
}
|
||||
],
|
||||
"citation_support": [
|
||||
{
|
||||
"citation_key_count": 0,
|
||||
"extracted_reference_count": 0,
|
||||
"has_citation_support": false
|
||||
}
|
||||
],
|
||||
"artifact_paths": [
|
||||
"documents/lecture-1/document.md"
|
||||
],
|
||||
"finding_messages": []
|
||||
}
|
||||
],
|
||||
"notes": [
|
||||
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"bibliography": {
|
||||
"enabled": false,
|
||||
"entry_count": 0,
|
||||
"source_files": []
|
||||
},
|
||||
"citations": {
|
||||
"enabled": true,
|
||||
"provider": "citegeist",
|
||||
"artifacts": [
|
||||
{
|
||||
"artifact_id": "ia_af72cb1641f3",
|
||||
"path": "documents/lecture-1/document.figures.json",
|
||||
"title": "document.figures",
|
||||
"citation_keys": [],
|
||||
"resolved_entries": [],
|
||||
"citation_key_count": 0,
|
||||
"extracted_references": [],
|
||||
"extracted_reference_count": 0,
|
||||
"citegeist_backends": [
|
||||
"anystyle",
|
||||
"grobid",
|
||||
"heuristic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"artifact_id": "ia_6cc5265d52f6",
|
||||
"path": "documents/lecture-1/document.layout.json",
|
||||
"title": "document.layout",
|
||||
"citation_keys": [],
|
||||
"resolved_entries": [],
|
||||
"citation_key_count": 0,
|
||||
"extracted_references": [],
|
||||
"extracted_reference_count": 0,
|
||||
"citegeist_backends": [
|
||||
"anystyle",
|
||||
"grobid",
|
||||
"heuristic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"artifact_id": "ia_51bdebab22e6",
|
||||
"path": "documents/lecture-1/document.md",
|
||||
"title": "document",
|
||||
"citation_keys": [],
|
||||
"resolved_entries": [],
|
||||
"citation_key_count": 0,
|
||||
"extracted_references": [],
|
||||
"extracted_reference_count": 0,
|
||||
"citegeist_backends": [
|
||||
"anystyle",
|
||||
"grobid",
|
||||
"heuristic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"artifact_id": "ia_893c59d73929",
|
||||
"path": "documents/lecture-1/document.tables.json",
|
||||
"title": "document.tables",
|
||||
"citation_keys": [],
|
||||
"resolved_entries": [],
|
||||
"citation_key_count": 0,
|
||||
"extracted_references": [],
|
||||
"extracted_reference_count": 0,
|
||||
"citegeist_backends": [
|
||||
"anystyle",
|
||||
"grobid",
|
||||
"heuristic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"artifact_id": "ia_ffa5b716b5a5",
|
||||
"path": "manifest.json",
|
||||
"title": "manifest",
|
||||
"citation_keys": [],
|
||||
"resolved_entries": [],
|
||||
"citation_key_count": 0,
|
||||
"extracted_references": [],
|
||||
"extracted_reference_count": 0,
|
||||
"citegeist_backends": [
|
||||
"anystyle",
|
||||
"grobid",
|
||||
"heuristic"
|
||||
]
|
||||
}
|
||||
],
|
||||
"summary": {
|
||||
"artifact_count_with_citations": 0,
|
||||
"citation_key_total": 0,
|
||||
"extracted_reference_total": 0
|
||||
},
|
||||
"next_actions": [
|
||||
"Promote citation-bearing claims into a dedicated citation review lane.",
|
||||
"Use CiteGeist extraction as a first pass, then verify support and metadata before trusting the citation."
|
||||
]
|
||||
}
|
||||
}
|
||||
20
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json
vendored
Normal file
20
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_queue.json
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"import_id": "doclift-test",
|
||||
"queue_length": 1,
|
||||
"items": [
|
||||
{
|
||||
"queue_id": "rq_clm_doclift_1",
|
||||
"candidate_type": "claim",
|
||||
"candidate_id": "clm_doclift_1",
|
||||
"title": "Lecture 1. Example is a lecture in the imported doclift bundle.",
|
||||
"triage_lane": "knowledge_capture",
|
||||
"priority": 35,
|
||||
"grounding_status": "grounded",
|
||||
"status": "needs_review",
|
||||
"finding_codes": [],
|
||||
"concept_ids": [
|
||||
"concept::lecture-1"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
36
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json
vendored
Normal file
36
tests/fixtures/doclift_bundle_minimal/imports/doclift-test/review_session.json
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"reviewer": "GroundRecall Import",
|
||||
"draft_pack": {
|
||||
"pack": {
|
||||
"name": "groundrecall-import-doclift-test",
|
||||
"display_name": "GroundRecall Import doclift-test",
|
||||
"version": "0.1.0-draft",
|
||||
"source_import_id": "doclift-test",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal"
|
||||
},
|
||||
"concepts": [
|
||||
{
|
||||
"concept_id": "lecture-1",
|
||||
"title": "Lecture 1. Example",
|
||||
"description": "Imported from doclift bundle document kind 'lecture'.",
|
||||
"prerequisites": [],
|
||||
"mastery_signals": [],
|
||||
"status": "provisional",
|
||||
"notes": [
|
||||
"Claim: Lecture 1. Example is a lecture in the imported doclift bundle. [grounded]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"conflicts": [],
|
||||
"review_flags": [],
|
||||
"attribution": {
|
||||
"source_repo_kind": "llmwiki",
|
||||
"source_root": "/home/netuser/dev/GroundRecall/tests/fixtures/doclift_bundle_minimal",
|
||||
"imported_at": "2026-04-23T11:22:39Z",
|
||||
"machine_id": "nerdanel",
|
||||
"rights_note": "Imported llmwiki-style corpus requires review before promotion."
|
||||
}
|
||||
},
|
||||
"citation_reviews": [],
|
||||
"ledger": []
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"documents": [
|
||||
{
|
||||
"document_id": "lecture-1",
|
||||
"title": "Lecture 1. Example",
|
||||
"document_kind": "lecture",
|
||||
"output_dir": "documents/lecture-1",
|
||||
"markdown_path": "documents/lecture-1/document.md",
|
||||
"layout_path": "documents/lecture-1/document.layout.json",
|
||||
"tables_path": "documents/lecture-1/document.tables.json",
|
||||
"figures_path": "documents/lecture-1/document.figures.json",
|
||||
"table_count": 1,
|
||||
"figure_reference_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -8,6 +8,10 @@ from groundrecall.source_adapters.base import detect_source_adapter, list_source
|
|||
from groundrecall.ingest import run_groundrecall_import
|
||||
|
||||
|
||||
def _fixture_doclift_bundle() -> Path:
|
||||
return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal"
|
||||
|
||||
|
||||
def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None:
|
||||
names = set(list_source_adapters())
|
||||
assert "llmwiki" in names
|
||||
|
|
@ -33,10 +37,8 @@ def test_detect_didactopus_pack_adapter(tmp_path: Path) -> None:
|
|||
assert adapter.import_intent() == "both"
|
||||
|
||||
|
||||
def test_detect_doclift_bundle_adapter(tmp_path: Path) -> None:
|
||||
(tmp_path / "documents").mkdir()
|
||||
(tmp_path / "manifest.json").write_text('{"documents": []}\n', encoding="utf-8")
|
||||
adapter = detect_source_adapter(tmp_path)
|
||||
def test_detect_doclift_bundle_adapter() -> None:
|
||||
adapter = detect_source_adapter(_fixture_doclift_bundle())
|
||||
assert adapter.name == "doclift_bundle"
|
||||
assert adapter.import_intent() == "both"
|
||||
|
||||
|
|
@ -201,35 +203,11 @@ def test_didactopus_pack_import_generates_structured_concepts_and_relations(tmp_
|
|||
assert "clm_stage_stage1_basics" in claim_ids
|
||||
|
||||
|
||||
def test_doclift_bundle_import_generates_structured_concepts(tmp_path: Path) -> None:
|
||||
doc_dir = tmp_path / "documents" / "lesson-a"
|
||||
doc_dir.mkdir(parents=True)
|
||||
(tmp_path / "manifest.json").write_text(
|
||||
'\n'.join(
|
||||
[
|
||||
"{",
|
||||
' "documents": [',
|
||||
" {",
|
||||
' "document_id": "lesson-a",',
|
||||
' "title": "Lecture 1. Example",',
|
||||
' "document_kind": "lecture",',
|
||||
f' "output_dir": "{doc_dir}",',
|
||||
f' "markdown_path": "{doc_dir / "document.md"}",',
|
||||
f' "figures_path": "{doc_dir / "document.figures.json"}"',
|
||||
" }",
|
||||
" ]",
|
||||
"}",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(doc_dir / "document.md").write_text("# Lecture 1. Example\n\nBody.\n", encoding="utf-8")
|
||||
(doc_dir / "document.figures.json").write_text('{"source_path": "/tmp/source.doc"}\n', encoding="utf-8")
|
||||
|
||||
result = run_groundrecall_import(tmp_path, mode="quick", import_id="doclift-test")
|
||||
def test_doclift_bundle_import_generates_structured_concepts() -> None:
|
||||
result = run_groundrecall_import(_fixture_doclift_bundle(), mode="quick", import_id="doclift-test")
|
||||
assert result.manifest["source_adapter"] == "doclift_bundle"
|
||||
assert result.manifest["import_intent"] == "both"
|
||||
concept_ids = {item["concept_id"] for item in result.concepts}
|
||||
assert "concept::lesson-a" in concept_ids
|
||||
assert "concept::lecture-1" in concept_ids
|
||||
claim_ids = {item["claim_id"] for item in result.claims}
|
||||
assert "clm_doclift_1" in claim_ids
|
||||
|
|
|
|||
Loading…
Reference in New Issue