55 lines
2.1 KiB
Python
Executable File
55 lines
2.1 KiB
Python
Executable File
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from doclift import convert as convert_module
|
|
|
|
|
|
def test_convert_directory_writes_manifest_and_conversion_report(tmp_path: Path, monkeypatch) -> None:
|
|
source_root = tmp_path / "src"
|
|
asset_root = tmp_path / "assets"
|
|
out_root = tmp_path / "out"
|
|
source_root.mkdir()
|
|
asset_root.mkdir()
|
|
(source_root / "sample.doc").write_text("stub", encoding="utf-8")
|
|
(asset_root / "Fig. 5.1.bmp").write_text("img", encoding="utf-8")
|
|
|
|
sample_text = "\n".join(
|
|
[
|
|
"Lecture 1. Example legacy document",
|
|
"",
|
|
"See Fig. 5.1 and Table 1.",
|
|
"",
|
|
"Table 1. Example caption",
|
|
"",
|
|
"Metric\tRest\tSwim",
|
|
"O2\t1.0\t2.0",
|
|
]
|
|
)
|
|
|
|
monkeypatch.setattr(convert_module, "run_catdoc", lambda path: sample_text)
|
|
|
|
report = convert_module.convert_directory(source_root, out_root, asset_root=asset_root)
|
|
|
|
assert report.document_count == 1
|
|
manifest = json.loads((out_root / "manifest.json").read_text(encoding="utf-8"))
|
|
conversion_report = json.loads((out_root / "conversion_report.json").read_text(encoding="utf-8"))
|
|
figures_payload = json.loads(
|
|
(out_root / "documents" / "sample-lecture-1-example-legacy-document" / "document.figures.json").read_text(
|
|
encoding="utf-8"
|
|
)
|
|
)
|
|
|
|
assert manifest["document_count"] == 1
|
|
assert manifest["source_root"] == "src"
|
|
assert manifest["documents"][0]["source_path"] == "sample.doc"
|
|
assert manifest["documents"][0]["markdown_path"] == "documents/sample-lecture-1-example-legacy-document/document.md"
|
|
assert conversion_report["summary"]["documents_with_tables"] == 1
|
|
assert conversion_report["summary"]["documents_with_figure_references"] == 1
|
|
assert figures_payload["source_path"] == "sample.doc"
|
|
assert figures_payload["source_path_kind"] == "source_root_relative"
|
|
assert figures_payload["figure_references"] == ["Fig. 5.1"]
|
|
assert len(figures_payload["related_assets"]) == 1
|
|
assert figures_payload["related_assets"][0]["path"] == "Fig. 5.1.bmp"
|