Compare commits
2 Commits
e819f17607
...
1731e0006a
| Author | SHA1 | Date |
|---|---|---|
|
|
1731e0006a | |
|
|
76ca54327a |
|
|
@ -1,3 +1,8 @@
|
|||
# GroundRecall
|
||||
|
||||
GroundRecall is a human-reviewable/AI usable knowledge layer with capabilities to meet or exceed 'llmwiki' with 'v2' specifications, plus an import path for existing llmwiki instances, and integration with Didactopus for review workflows and knowledge merging.
|
||||
GroundRecall is a human-reviewable/AI usable knowledge layer with capabilities to meet or exceed 'llmwiki' with 'v2' specifications, plus an import path for existing llmwiki instances, and integration with Didactopus for review workflows and knowledge merging.
|
||||
|
||||
`GroundRecall` can also import normalized `doclift` bundles directly when the
|
||||
source material began as legacy office documents and you want a provenance-aware
|
||||
knowledge import without going through a learner pack first. See
|
||||
`docs/quickstart.md` for the minimal `doclift -> GroundRecall` flow.
|
||||
|
|
|
|||
|
|
@ -33,6 +33,21 @@ groundrecall import /path/to/llmwiki --mode grounded
|
|||
|
||||
The importer writes normalized artifacts under `imports/<import-id>/`.
|
||||
|
||||
Import from a normalized `doclift` bundle:
|
||||
|
||||
```bash
|
||||
groundrecall import /path/to/doclift-bundle --mode quick
|
||||
```
|
||||
|
||||
This path is intended for legacy-document corpora that were first normalized by
|
||||
`doclift`. If you want a learner-facing pack first, use Didactopus in between:
|
||||
|
||||
```bash
|
||||
doclift convert-dir /path/to/legacy-course /tmp/doclift-bundle --asset-root /path/to/legacy-course
|
||||
didactopus doclift-bundle /tmp/doclift-bundle /tmp/didactopus-pack --course-title "Example Course"
|
||||
groundrecall import /tmp/doclift-bundle --mode quick
|
||||
```
|
||||
|
||||
## Review And Promote
|
||||
|
||||
Inspect the import outputs:
|
||||
|
|
|
|||
|
|
@ -10,6 +10,14 @@ from .base import DiscoveredImportSource, StructuredImportRows, register_source_
|
|||
class DocliftBundleSourceAdapter:
|
||||
name = "doclift_bundle"
|
||||
|
||||
def _resolve_bundle_path(self, base: Path, value: str | Path | None) -> Path:
|
||||
if value is None:
|
||||
return Path()
|
||||
path = Path(value)
|
||||
if path.is_absolute():
|
||||
return path
|
||||
return base / path
|
||||
|
||||
def detect(self, root: str | Path) -> bool:
|
||||
base = Path(root)
|
||||
return (base / "manifest.json").exists() and (base / "documents").exists()
|
||||
|
|
@ -69,10 +77,13 @@ class DocliftBundleSourceAdapter:
|
|||
for index, document in enumerate(documents, start=1):
|
||||
title = str(document.get("title") or f"Document {index}")
|
||||
concept_id = f"concept::{document.get('document_id') or title.lower().replace(' ', '-')}"
|
||||
markdown_path = Path(document.get("markdown_path", ""))
|
||||
relative_markdown = markdown_path.relative_to(base).as_posix() if markdown_path.is_absolute() and markdown_path.exists() and markdown_path.is_relative_to(base) else document.get("markdown_path", "")
|
||||
markdown_path = self._resolve_bundle_path(base, document.get("markdown_path", ""))
|
||||
if markdown_path.exists():
|
||||
relative_markdown = markdown_path.relative_to(base).as_posix()
|
||||
else:
|
||||
relative_markdown = str(document.get("markdown_path", ""))
|
||||
artifact_id = artifact_by_path.get(str(relative_markdown), "")
|
||||
figures_path = Path(document.get("figures_path", ""))
|
||||
figures_path = self._resolve_bundle_path(base, document.get("figures_path", ""))
|
||||
figure_payload = {}
|
||||
if figures_path.exists():
|
||||
figure_payload = json.loads(figures_path.read_text(encoding="utf-8"))
|
||||
|
|
|
|||
9
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json
vendored
Executable file
9
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.figures.json
vendored
Executable file
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"source_path": "legacy/lecture-1.doc",
|
||||
"figure_references": [
|
||||
{
|
||||
"label": "Figure 1",
|
||||
"caption": "Example figure caption"
|
||||
}
|
||||
]
|
||||
}
|
||||
8
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json
vendored
Executable file
8
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.layout.json
vendored
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
{
|
||||
"line_index": 0,
|
||||
"text": "Lecture 1. Example",
|
||||
"kind": "heading",
|
||||
"indent": 0
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# Lecture 1. Example
|
||||
|
||||
## Module A
|
||||
|
||||
### Lesson A
|
||||
|
||||
- Objective: Explain lesson A.
|
||||
|
||||
Body text that grounds the example lesson.
|
||||
13
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json
vendored
Executable file
13
tests/fixtures/doclift_bundle_minimal/documents/lecture-1/document.tables.json
vendored
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"source_path": "legacy/lecture-1.doc",
|
||||
"tables": [
|
||||
{
|
||||
"table_id": "table-1",
|
||||
"caption": "Example table",
|
||||
"rows": [
|
||||
["Column A", "Column B"],
|
||||
["1", "2"]
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"documents": [
|
||||
{
|
||||
"document_id": "lecture-1",
|
||||
"title": "Lecture 1. Example",
|
||||
"document_kind": "lecture",
|
||||
"output_dir": "documents/lecture-1",
|
||||
"markdown_path": "documents/lecture-1/document.md",
|
||||
"layout_path": "documents/lecture-1/document.layout.json",
|
||||
"tables_path": "documents/lecture-1/document.tables.json",
|
||||
"figures_path": "documents/lecture-1/document.figures.json",
|
||||
"table_count": 1,
|
||||
"figure_reference_count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
|
||||
import groundrecall.ingest as ingest_module
|
||||
import groundrecall.source_adapters # noqa: F401
|
||||
|
|
@ -8,6 +9,16 @@ from groundrecall.source_adapters.base import detect_source_adapter, list_source
|
|||
from groundrecall.ingest import run_groundrecall_import
|
||||
|
||||
|
||||
def _fixture_doclift_bundle() -> Path:
|
||||
return Path(__file__).parent / "fixtures" / "doclift_bundle_minimal"
|
||||
|
||||
|
||||
def _copied_fixture_doclift_bundle(tmp_path: Path) -> Path:
|
||||
target = tmp_path / "doclift_bundle_minimal"
|
||||
shutil.copytree(_fixture_doclift_bundle(), target)
|
||||
return target
|
||||
|
||||
|
||||
def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None:
|
||||
names = set(list_source_adapters())
|
||||
assert "llmwiki" in names
|
||||
|
|
@ -33,10 +44,8 @@ def test_detect_didactopus_pack_adapter(tmp_path: Path) -> None:
|
|||
assert adapter.import_intent() == "both"
|
||||
|
||||
|
||||
def test_detect_doclift_bundle_adapter(tmp_path: Path) -> None:
|
||||
(tmp_path / "documents").mkdir()
|
||||
(tmp_path / "manifest.json").write_text('{"documents": []}\n', encoding="utf-8")
|
||||
adapter = detect_source_adapter(tmp_path)
|
||||
def test_detect_doclift_bundle_adapter() -> None:
|
||||
adapter = detect_source_adapter(_fixture_doclift_bundle())
|
||||
assert adapter.name == "doclift_bundle"
|
||||
assert adapter.import_intent() == "both"
|
||||
|
||||
|
|
@ -202,34 +211,10 @@ def test_didactopus_pack_import_generates_structured_concepts_and_relations(tmp_
|
|||
|
||||
|
||||
def test_doclift_bundle_import_generates_structured_concepts(tmp_path: Path) -> None:
|
||||
doc_dir = tmp_path / "documents" / "lesson-a"
|
||||
doc_dir.mkdir(parents=True)
|
||||
(tmp_path / "manifest.json").write_text(
|
||||
'\n'.join(
|
||||
[
|
||||
"{",
|
||||
' "documents": [',
|
||||
" {",
|
||||
' "document_id": "lesson-a",',
|
||||
' "title": "Lecture 1. Example",',
|
||||
' "document_kind": "lecture",',
|
||||
f' "output_dir": "{doc_dir}",',
|
||||
f' "markdown_path": "{doc_dir / "document.md"}",',
|
||||
f' "figures_path": "{doc_dir / "document.figures.json"}"',
|
||||
" }",
|
||||
" ]",
|
||||
"}",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(doc_dir / "document.md").write_text("# Lecture 1. Example\n\nBody.\n", encoding="utf-8")
|
||||
(doc_dir / "document.figures.json").write_text('{"source_path": "/tmp/source.doc"}\n', encoding="utf-8")
|
||||
|
||||
result = run_groundrecall_import(tmp_path, mode="quick", import_id="doclift-test")
|
||||
result = run_groundrecall_import(_copied_fixture_doclift_bundle(tmp_path), mode="quick", import_id="doclift-test")
|
||||
assert result.manifest["source_adapter"] == "doclift_bundle"
|
||||
assert result.manifest["import_intent"] == "both"
|
||||
concept_ids = {item["concept_id"] for item in result.concepts}
|
||||
assert "concept::lesson-a" in concept_ids
|
||||
assert "concept::lecture-1" in concept_ids
|
||||
claim_ids = {item["claim_id"] for item in result.claims}
|
||||
assert "clm_doclift_1" in claim_ids
|
||||
|
|
|
|||
Loading…
Reference in New Issue