Preserve portable doclift source paths
This commit is contained in:
parent
b7e2f9f540
commit
3837bd2316
|
|
@ -38,6 +38,15 @@ def _safe_read_json(path: Path) -> dict:
|
|||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _resolve_bundle_path(base: Path, value: str | Path | None, fallback: Path) -> Path:
|
||||
if value is None or value == "":
|
||||
return fallback
|
||||
path = Path(value)
|
||||
if path.is_absolute():
|
||||
return path
|
||||
return base / path
|
||||
|
||||
|
||||
def adapt_markdown(path: str | Path) -> NormalizedDocument:
|
||||
text = read_textish(path)
|
||||
return NormalizedDocument(
|
||||
|
|
@ -140,9 +149,14 @@ def adapt_doclift_bundle(path: str | Path) -> list[NormalizedDocument]:
|
|||
text = markdown_path.read_text(encoding="utf-8")
|
||||
sections = _simple_section_split(text)
|
||||
bundle_meta = by_output_dir.get(doc_dir.name, {})
|
||||
figures_payload = _safe_read_json(doc_dir / "document.figures.json")
|
||||
tables_payload = _safe_read_json(doc_dir / "document.tables.json")
|
||||
source_path = figures_payload.get("source_path") or tables_payload.get("source_path") or str(markdown_path)
|
||||
layout_path = _resolve_bundle_path(base, bundle_meta.get("layout_path"), doc_dir / "document.layout.json")
|
||||
tables_path = _resolve_bundle_path(base, bundle_meta.get("tables_path"), doc_dir / "document.tables.json")
|
||||
figures_path = _resolve_bundle_path(base, bundle_meta.get("figures_path"), doc_dir / "document.figures.json")
|
||||
figures_payload = _safe_read_json(figures_path)
|
||||
tables_payload = _safe_read_json(tables_path)
|
||||
source_path = figures_payload.get("source_path") or tables_payload.get("source_path") or markdown_path.relative_to(base).as_posix()
|
||||
relative_doc_dir = doc_dir.relative_to(base).as_posix()
|
||||
relative_markdown_path = markdown_path.relative_to(base).as_posix()
|
||||
docs.append(
|
||||
NormalizedDocument(
|
||||
source_path=str(source_path),
|
||||
|
|
@ -152,13 +166,14 @@ def adapt_doclift_bundle(path: str | Path) -> list[NormalizedDocument]:
|
|||
sections=sections,
|
||||
metadata={
|
||||
"doclift_bundle": True,
|
||||
"bundle_root": str(base),
|
||||
"bundle_document_dir": str(doc_dir),
|
||||
"bundle_markdown_path": str(markdown_path),
|
||||
"bundle_root": ".",
|
||||
"bundle_document_dir": relative_doc_dir,
|
||||
"bundle_markdown_path": relative_markdown_path,
|
||||
"document_kind": bundle_meta.get("document_kind", "document"),
|
||||
"layout_path": bundle_meta.get("layout_path", str(doc_dir / "document.layout.json")),
|
||||
"tables_path": bundle_meta.get("tables_path", str(doc_dir / "document.tables.json")),
|
||||
"figures_path": bundle_meta.get("figures_path", str(doc_dir / "document.figures.json")),
|
||||
"source_path_kind": figures_payload.get("source_path_kind") or tables_payload.get("source_path_kind") or bundle_meta.get("source_path_kind", "source_root_relative"),
|
||||
"layout_path": bundle_meta.get("layout_path", layout_path.relative_to(base).as_posix()),
|
||||
"tables_path": bundle_meta.get("tables_path", tables_path.relative_to(base).as_posix()),
|
||||
"figures_path": bundle_meta.get("figures_path", figures_path.relative_to(base).as_posix()),
|
||||
"table_count": bundle_meta.get("table_count", 0),
|
||||
"figure_reference_count": bundle_meta.get("figure_reference_count", 0),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -75,10 +75,10 @@ def test_adapt_documents_from_doclift_bundle(tmp_path: Path) -> None:
|
|||
{
|
||||
"title": "Lecture 1. Example",
|
||||
"document_kind": "lecture",
|
||||
"output_dir": str(doc_dir),
|
||||
"layout_path": str(doc_dir / "document.layout.json"),
|
||||
"tables_path": str(doc_dir / "document.tables.json"),
|
||||
"figures_path": str(doc_dir / "document.figures.json"),
|
||||
"output_dir": "documents/lesson-a",
|
||||
"layout_path": "documents/lesson-a/document.layout.json",
|
||||
"tables_path": "documents/lesson-a/document.tables.json",
|
||||
"figures_path": "documents/lesson-a/document.figures.json",
|
||||
"table_count": 1,
|
||||
"figure_reference_count": 0,
|
||||
}
|
||||
|
|
@ -89,8 +89,8 @@ def test_adapt_documents_from_doclift_bundle(tmp_path: Path) -> None:
|
|||
)
|
||||
(doc_dir / "document.md").write_text("# Lecture 1. Example\n\n## Module\n### Lesson A\nBody.", encoding="utf-8")
|
||||
(doc_dir / "document.layout.json").write_text("[]", encoding="utf-8")
|
||||
(doc_dir / "document.tables.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "tables": []}), encoding="utf-8")
|
||||
(doc_dir / "document.figures.json").write_text(json.dumps({"source_path": "/tmp/source.doc", "figure_references": []}), encoding="utf-8")
|
||||
(doc_dir / "document.tables.json").write_text(json.dumps({"source_path": "raw/source.doc", "source_path_kind": "source_root_relative", "tables": []}), encoding="utf-8")
|
||||
(doc_dir / "document.figures.json").write_text(json.dumps({"source_path": "raw/source.doc", "source_path_kind": "source_root_relative", "figure_references": []}), encoding="utf-8")
|
||||
|
||||
docs = adapt_documents(bundle)
|
||||
|
||||
|
|
@ -99,4 +99,6 @@ def test_adapt_documents_from_doclift_bundle(tmp_path: Path) -> None:
|
|||
assert docs[0].title == "Lecture 1. Example"
|
||||
assert docs[0].metadata["document_kind"] == "lecture"
|
||||
assert docs[0].metadata["doclift_bundle"] is True
|
||||
assert docs[0].source_path == "/tmp/source.doc"
|
||||
assert docs[0].source_path == "raw/source.doc"
|
||||
assert docs[0].metadata["bundle_markdown_path"] == "documents/lesson-a/document.md"
|
||||
assert docs[0].metadata["source_path_kind"] == "source_root_relative"
|
||||
|
|
|
|||
Loading…
Reference in New Issue