diff --git a/src/groundrecall/groundrecall_source_adapters/__init__.py b/src/groundrecall/groundrecall_source_adapters/__init__.py index b0b9fd3..54b6fbf 100644 --- a/src/groundrecall/groundrecall_source_adapters/__init__.py +++ b/src/groundrecall/groundrecall_source_adapters/__init__.py @@ -8,8 +8,10 @@ from .base import get_source_adapter, list_source_adapters from . import llmwiki # noqa: F401 from . import polypaper # noqa: F401 from . import doclift_bundle # noqa: F401 +from . import indexcc # noqa: F401 from . import markdown_notes # noqa: F401 from . import transcript # noqa: F401 from . import didactopus_pack # noqa: F401 +from . import pandasthumb_mt # noqa: F401 __all__ = ["get_source_adapter", "list_source_adapters"] diff --git a/src/groundrecall/groundrecall_source_adapters/indexcc.py b/src/groundrecall/groundrecall_source_adapters/indexcc.py new file mode 100644 index 0000000..fe5b0a1 --- /dev/null +++ b/src/groundrecall/groundrecall_source_adapters/indexcc.py @@ -0,0 +1,199 @@ +from __future__ import annotations + +from hashlib import sha256 +import json +import re +from pathlib import Path +from typing import Any + +from .base import DiscoveredImportSource, StructuredImportRows, register_source_adapter + + +SECTION_RE = re.compile(r"^##\s+(.*)$", re.M) + + +def _site_root(root: Path) -> Path: + candidate = root / "site2_src" / "content" / "indexcc" + if candidate.is_dir(): + return candidate + if (root / "content" / "indexcc").is_dir(): + return root / "content" / "indexcc" + if root.name == "indexcc" and root.is_dir(): + return root + return root + + +def _discover_md_files(base: Path) -> list[Path]: + if not base.exists(): + return [] + if base.is_file(): + return [base] if base.suffix.lower() == ".md" else [] + return sorted(path for path in base.rglob("*.md") if path.is_file()) + + +def _read_meta(md_path: Path) -> dict[str, Any]: + meta_path = md_path.with_suffix(".meta.json") + if not meta_path.exists(): + return {} + return json.loads(meta_path.read_text(encoding="utf-8")) + + +def _split_sections(text: str) -> dict[str, str]: + lines = text.splitlines() + current = "Body" + sections: dict[str, list[str]] = {current: []} + for line in lines: + match = SECTION_RE.match(line) + if match: + current = match.group(1).strip() + sections.setdefault(current, []) + continue + sections.setdefault(current, []).append(line) + return {key: "\n".join(value).strip() for key, value in sections.items() if "\n".join(value).strip()} + + +class IndexCcSourceAdapter: + name = "indexcc" + + def detect(self, root: str | Path) -> bool: + base = _site_root(Path(root)) + if not base.is_dir(): + return False + md_files = _discover_md_files(base) + if not md_files: + return False + return any(str(_read_meta(path).get("page_kind", "")) == "claim_entry" for path in md_files) + + def discover(self, root: str | Path) -> list[DiscoveredImportSource]: + base = _site_root(Path(root)) + rows: list[DiscoveredImportSource] = [] + for path in _discover_md_files(base): + rows.append( + DiscoveredImportSource( + path=path, + relative_path=path.relative_to(base).as_posix(), + source_kind="indexcc", + artifact_kind="indexcc_entry", + is_text=True, + metadata={"corpus": "indexcc"}, + ) + ) + return rows + + def import_intent(self) -> str: + return "grounded_knowledge" + + def build_rows(self, context, sources: list[DiscoveredImportSource]) -> StructuredImportRows | None: + artifact_rows: list[dict[str, Any]] = [] + observation_rows: list[dict[str, Any]] = [] + claim_rows: list[dict[str, Any]] = [] + concept_rows: list[dict[str, Any]] = [] + relation_rows: list[dict[str, Any]] = [] + + for index, source in enumerate(sources, start=1): + meta = _read_meta(source.path) + text = source.path.read_text(encoding="utf-8") + sections = _split_sections(text) + title = str(meta.get("title") or source.path.stem) + claim_text = sections.get("Claim", "") + response_text = sections.get("Response", "") + references_text = sections.get("References", "") + further_text = sections.get("Further Reading", "") + artifact_id = f"ia_{sha256(source.relative_path.encode('utf-8')).hexdigest()[:12]}" + artifact_rows.append( + { + "artifact_id": artifact_id, + "import_id": context.import_id, + "artifact_kind": source.artifact_kind, + "path": source.relative_path, + "title": title, + "sha256": sha256(source.path.read_bytes()).hexdigest(), + "created_at": context.imported_at, + "metadata": { + "corpus": "indexcc", + "document_kind": meta.get("page_kind", "claim_entry"), + "author": meta.get("author", ""), + "legacy_source": meta.get("legacy_source", ""), + "section_label": meta.get("section_label", ""), + "page_kind": meta.get("page_kind", ""), + }, + "current_status": "draft", + } + ) + + body_sections = [ + ("Claim", claim_text), + ("Response", response_text), + ("References", references_text), + ("Further Reading", further_text), + ] + for sec_index, (section_name, section_text) in enumerate(body_sections, start=1): + if not section_text: + continue + observation_rows.append( + { + "observation_id": f"obs_{artifact_id}_{sec_index}", + "import_id": context.import_id, + "artifact_id": artifact_id, + "role": "summary" if section_name != "Claim" else "claim", + "text": section_text, + "origin_path": source.relative_path, + "origin_section": section_name, + "line_start": 0, + "line_end": 0, + "source_url": str(meta.get("legacy_source") or ""), + "metadata": { + "corpus": "indexcc", + "document_kind": meta.get("page_kind", "claim_entry"), + "section_name": section_name, + "author": meta.get("author", ""), + }, + "grounding_status": "grounded", + "support_kind": "direct_source", + "confidence_hint": 0.88 if section_name == "Claim" else 0.8, + "current_status": "draft", + } + ) + + claim_obs_id = f"obs_{artifact_id}_1" if claim_text else "" + if claim_text: + claim_rows.append( + { + "claim_id": f"clm_{artifact_id}", + "import_id": context.import_id, + "claim_text": claim_text, + "claim_kind": "claim_entry", + "source_observation_ids": [claim_obs_id], + "supporting_fragment_ids": [], + "concept_ids": [f"concept::{source.path.stem.lower()}"], + "contradicts_claim_ids": [], + "supersedes_claim_ids": [], + "confidence_hint": 0.88, + "grounding_status": "grounded", + "current_status": "triaged", + } + ) + + concept_rows.append( + { + "concept_id": f"concept::{source.path.stem.lower()}", + "import_id": context.import_id, + "title": title, + "aliases": [source.path.stem.upper()], + "description": meta.get("description", "Imported Index to Creationist Claims entry."), + "source_artifact_ids": [artifact_id], + "current_status": "triaged", + } + ) + + return StructuredImportRows( + artifact_rows=artifact_rows, + fragment_rows=[], + observation_rows=observation_rows, + claim_rows=claim_rows, + concept_rows=concept_rows, + relation_rows=relation_rows, + ) + + +register_source_adapter(IndexCcSourceAdapter()) diff --git a/src/groundrecall/groundrecall_source_adapters/pandasthumb_mt.py b/src/groundrecall/groundrecall_source_adapters/pandasthumb_mt.py new file mode 100644 index 0000000..83078ad --- /dev/null +++ b/src/groundrecall/groundrecall_source_adapters/pandasthumb_mt.py @@ -0,0 +1,291 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from hashlib import sha256 +from html.parser import HTMLParser +from pathlib import Path +import re +from typing import Any + +from .base import DiscoveredImportSource, StructuredImportRows, register_source_adapter + + +ARTIFACT_SUFFIXES = {".html", ".htm"} +ARTICLE_TITLE_RE = re.compile(r'

(.*?)

', re.I | re.S) +BYLINE_RE = re.compile( + r'

\s*Posted\s+(?P.*?)\s+by\s+', + re.I | re.S, +) +COMMENT_META_RE = re.compile( + r'

\s*(?P.*?)\s*·\s*(?P.*?)

', + re.I | re.S, +) +COMMENTS_SECTION_RE = re.compile(r'
(.*?)
', re.I | re.S) + + +def _strip_tags(text: str) -> str: + return re.sub(r"(?s)<[^>]+>", " ", text) + + +def _normalize_space(text: str) -> str: + text = text.replace("\r", "\n") + text = re.sub(r"[ \t]+\n", "\n", text) + text = re.sub(r"\n{3,}", "\n\n", text) + text = re.sub(r"[ \t]{2,}", " ", text) + return text.strip() + + +def _fragment_to_text(fragment: str) -> str: + fragment = re.sub(r"(?is)", " ", fragment) + fragment = re.sub(r"(?is)", " ", fragment) + fragment = re.sub( + r'(?is)]*href=["\']([^"\']+)["\'][^>]*>(.*?)', + lambda match: f"{_strip_tags(match.group(2)).strip()} ({match.group(1).strip()})".strip(), + fragment, + ) + fragment = re.sub(r"(?i)", "\n", fragment) + fragment = re.sub(r"(?i)", "\n\n", fragment) + fragment = re.sub(r"(?i)]*>", "", fragment) + fragment = re.sub(r"(?i)", "\n", fragment) + fragment = re.sub(r"(?i)]*>", "", fragment) + fragment = re.sub(r"(?i)", "\n", fragment) + fragment = re.sub(r"(?i)]*>", "", fragment) + fragment = re.sub(r"(?i)", "\n", fragment) + fragment = re.sub(r"(?i)]*>", "\n> ", fragment) + fragment = re.sub(r"(?i)", "\n", fragment) + fragment = re.sub(r"(?i)]*>", "\n- ", fragment) + fragment = re.sub(r"(?i)]*>|", "\n", fragment) + fragment = re.sub(r"(?i)]*>|", "\n", fragment) + fragment = _strip_tags(fragment) + fragment = re.sub(r"\s*\n\s*", "\n", fragment) + return _normalize_space(fragment.replace("\xa0", " ")) + + +def _id_from_path(relative_path: str) -> str: + return f"pt_{sha256(relative_path.encode('utf-8')).hexdigest()[:12]}" + + +def _site_root(root: Path) -> Path: + candidate = root / "public_html" + if (candidate / "archives").is_dir(): + return candidate + return root + + +def _discover_html_files(site_root: Path) -> list[Path]: + archives = site_root / "archives" + if not archives.is_dir(): + return [] + rows = [] + for path in sorted(archives.rglob("*")): + if not path.is_file() or path.suffix.lower() not in ARTIFACT_SUFFIXES: + continue + if path.name.lower() == "index.html": + continue + rows.append(path) + return rows + + +def _extract_article(html_text: str, relative_path: str) -> dict[str, Any] | None: + title_match = ARTICLE_TITLE_RE.search(html_text) + body_match = re.search(r'
(.*)', html_text, re.I | re.S) + if title_match is None or body_match is None: + return None + + meta_match = BYLINE_RE.search(html_text) + comments_match = COMMENTS_SECTION_RE.search(html_text) + body_html = body_match.group(1) + if comments_match is not None: + body_html = body_html[: comments_match.start() - body_match.end()] + body_text = _fragment_to_text(body_html) + title = _fragment_to_text(title_match.group(1)) + author = _fragment_to_text(meta_match.group("author")) if meta_match else "" + published_at = _fragment_to_text(meta_match.group("date")) if meta_match else "" + canonical_url = "/" + relative_path.lstrip("/") + return { + "document_id": _id_from_path(relative_path), + "title": title, + "author": author, + "published_at": published_at, + "canonical_url": canonical_url, + "body_text": body_text, + } + + +def _extract_comments(html_text: str, relative_path: str, parent_document_id: str) -> list[dict[str, Any]]: + comments_match = COMMENTS_SECTION_RE.search(html_text) + if comments_match is None: + return [] + comments_html = comments_match.group(1) + rows: list[dict[str, Any]] = [] + starts = list(re.finditer(r'
', comments_html, re.I)) + for index, match in enumerate(starts): + start = match.end() + end = starts[index + 1].start() if index + 1 < len(starts) else len(comments_html) + chunk = comments_html[start:end] + meta_match = COMMENT_META_RE.search(chunk) + body_match = re.search(r'
(.*)
', chunk, re.I | re.S) + if body_match is None: + continue + rows.append( + { + "document_id": f"{parent_document_id}__comment_{match.group('comment_id')}", + "parent_document_id": parent_document_id, + "comment_id": match.group("comment_id"), + "document_kind": "comment", + "comment_author": _fragment_to_text(meta_match.group("author")) if meta_match else "", + "comment_date": _fragment_to_text(meta_match.group("date")) if meta_match else "", + "canonical_url": "/" + relative_path.lstrip("/"), + "body_text": _fragment_to_text(body_match.group(1)), + } + ) + return rows + + +class PandasThumbMtSourceAdapter: + name = "pandasthumb_mt" + + def detect(self, root: str | Path) -> bool: + site_root = _site_root(Path(root)) + return (site_root / "archives").is_dir() and (site_root / "index.html").exists() + + def discover(self, root: str | Path) -> list[DiscoveredImportSource]: + site_root = _site_root(Path(root)) + rows: list[DiscoveredImportSource] = [] + for path in _discover_html_files(site_root): + rows.append( + DiscoveredImportSource( + path=path, + relative_path=path.relative_to(site_root).as_posix(), + source_kind="pandasthumb_mt", + artifact_kind="pandasthumb_mt_page", + is_text=True, + metadata={"corpus": "pandasthumb_mt"}, + ) + ) + return rows + + def import_intent(self) -> str: + return "grounded_knowledge" + + def build_rows(self, context, sources: list[DiscoveredImportSource]) -> StructuredImportRows | None: + artifact_rows: list[dict[str, Any]] = [] + observation_rows: list[dict[str, Any]] = [] + claim_rows: list[dict[str, Any]] = [] + concept_rows: list[dict[str, Any]] = [] + relation_rows: list[dict[str, Any]] = [] + + for source in sources: + html_text = source.path.read_text(encoding="utf-8", errors="replace") + article = _extract_article(html_text, source.relative_path) + if article is None: + continue + + artifact_id = _id_from_path(source.relative_path) + artifact_rows.append( + { + "artifact_id": artifact_id, + "import_id": context.import_id, + "artifact_kind": source.artifact_kind, + "path": source.relative_path, + "title": article["title"], + "sha256": sha256(source.path.read_bytes()).hexdigest(), + "created_at": context.imported_at, + "metadata": { + "corpus": "pandasthumb_mt", + "document_kind": "article", + "author": article["author"], + "published_at": article["published_at"], + "canonical_url": article["canonical_url"], + }, + "current_status": "draft", + } + ) + observation_rows.append( + { + "observation_id": f"obs_{artifact_id}_body", + "import_id": context.import_id, + "artifact_id": artifact_id, + "role": "summary", + "text": article["body_text"], + "origin_path": source.relative_path, + "origin_section": article["title"], + "line_start": 0, + "line_end": 0, + "source_url": article["canonical_url"], + "metadata": { + "corpus": "pandasthumb_mt", + "document_kind": "article", + "author": article["author"], + "published_at": article["published_at"], + }, + "grounding_status": "grounded", + "support_kind": "direct_source", + "confidence_hint": 0.75, + "current_status": "draft", + } + ) + + for comment in _extract_comments(html_text, source.relative_path, artifact_id): + comment_artifact_id = comment["document_id"] + artifact_rows.append( + { + "artifact_id": comment_artifact_id, + "import_id": context.import_id, + "artifact_kind": "pandasthumb_mt_comment", + "path": source.relative_path, + "title": f"{article['title']} comment {comment['comment_id']}", + "sha256": sha256( + f"{source.relative_path}#{comment['comment_id']}".encode("utf-8") + ).hexdigest(), + "created_at": context.imported_at, + "metadata": { + "corpus": "pandasthumb_mt", + "document_kind": "comment", + "parent_document_id": artifact_id, + "comment_id": comment["comment_id"], + "comment_author": comment["comment_author"], + "comment_date": comment["comment_date"], + "canonical_url": comment["canonical_url"], + }, + "current_status": "draft", + } + ) + observation_rows.append( + { + "observation_id": f"obs_{comment_artifact_id}_body", + "import_id": context.import_id, + "artifact_id": comment_artifact_id, + "role": "summary", + "text": comment["body_text"], + "origin_path": source.relative_path, + "origin_section": f"comment {comment['comment_id']}", + "line_start": 0, + "line_end": 0, + "source_url": comment["canonical_url"], + "metadata": { + "corpus": "pandasthumb_mt", + "document_kind": "comment", + "parent_document_id": artifact_id, + "comment_id": comment["comment_id"], + "comment_author": comment["comment_author"], + "comment_date": comment["comment_date"], + }, + "grounding_status": "grounded", + "support_kind": "direct_source", + "confidence_hint": 0.7, + "current_status": "draft", + } + ) + + return StructuredImportRows( + artifact_rows=artifact_rows, + fragment_rows=[], + observation_rows=observation_rows, + claim_rows=claim_rows, + concept_rows=concept_rows, + relation_rows=relation_rows, + ) + + +register_source_adapter(PandasThumbMtSourceAdapter()) diff --git a/src/groundrecall/query.py b/src/groundrecall/query.py index 98b78cf..0949a9b 100644 --- a/src/groundrecall/query.py +++ b/src/groundrecall/query.py @@ -121,6 +121,66 @@ def search_claims( } +def _artifact_corpus(artifact) -> str: + corpus = artifact.metadata.get("corpus") if isinstance(getattr(artifact, "metadata", None), dict) else "" + return str(corpus or "") + + +def search_documents( + store_dir: str | Path, + text: str, + corpora: list[str] | None = None, + include_rejected: bool = False, + limit: int = 20, +) -> dict[str, Any]: + store = GroundRecallStore(store_dir) + artifacts = {item.artifact_id: item for item in store.list_artifacts()} + observations_by_artifact: dict[str, list[Any]] = {} + for observation in store.list_observations(): + observations_by_artifact.setdefault(observation.artifact_id, []).append(observation) + + active_corpora = {value for value in (corpora or []) if value} + matches: list[dict[str, Any]] = [] + + for artifact in artifacts.values(): + corpus = _artifact_corpus(artifact) + if active_corpora and corpus not in active_corpora: + continue + if not include_rejected and artifact.current_status == "rejected": + continue + + artifact_observations = observations_by_artifact.get(artifact.artifact_id, []) + haystack_parts = [ + artifact.title, + artifact.path, + corpus, + str(artifact.metadata.get("document_kind", "")), + str(artifact.metadata.get("author", "")), + str(artifact.metadata.get("canonical_url", "")), + str(artifact.metadata.get("published_at", "")), + ] + haystack_parts.extend(observation.text for observation in artifact_observations) + haystack = " ".join(part for part in haystack_parts if part) + if _matches(text, haystack): + matches.append( + { + "artifact": artifact.model_dump(), + "corpus": corpus, + "observation_count": len(artifact_observations), + "matching_text": haystack[:800], + } + ) + if len(matches) >= limit: + break + + return { + "query_type": "document_search", + "query": text, + "active_corpora": sorted(active_corpora), + "matches": matches, + } + + def query_provenance( store_dir: str | Path, origin_path: str | None = None, @@ -178,12 +238,34 @@ def build_query_bundle_for_concept(store_dir: str | Path, concept_ref: str) -> d } +def build_search_bundle( + store_dir: str | Path, + text: str, + corpora: list[str] | None = None, + limit: int = 20, +) -> dict[str, Any]: + payload = search_documents(store_dir, text=text, corpora=corpora, limit=limit) + return { + "bundle_kind": "groundrecall_search_bundle", + "query_type": "document_search", + "query": text, + "active_corpora": payload["active_corpora"], + "matches": payload["matches"], + "suggested_next_actions": [ + "Open the matching documents and review the artifact metadata.", + "Tighten the corpus filter when the result set is too broad.", + "Use corpus defaults for a site-specific search preset and add others only when needed.", + ], + } + + def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Query canonical GroundRecall objects.") parser.add_argument("store_dir") parser.add_argument("query") - parser.add_argument("--kind", choices=["concept", "claim", "provenance", "bundle"], default="concept") + parser.add_argument("--kind", choices=["concept", "claim", "provenance", "bundle", "search"], default="concept") parser.add_argument("--source-url", default=None) + parser.add_argument("--corpus", action="append", default=[]) return parser @@ -195,6 +277,8 @@ def main() -> None: payload = search_claims(args.store_dir, args.query) elif args.kind == "provenance": payload = query_provenance(args.store_dir, origin_path=args.query, source_url=args.source_url) + elif args.kind == "search": + payload = build_search_bundle(args.store_dir, args.query, corpora=list(args.corpus or [])) else: payload = build_query_bundle_for_concept(args.store_dir, args.query) print(json.dumps(payload, indent=2)) diff --git a/src/groundrecall/source_adapters/indexcc.py b/src/groundrecall/source_adapters/indexcc.py new file mode 100644 index 0000000..77ecf70 --- /dev/null +++ b/src/groundrecall/source_adapters/indexcc.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +from ..groundrecall_source_adapters.indexcc import * # noqa: F403 diff --git a/src/groundrecall/source_adapters/pandasthumb_mt.py b/src/groundrecall/source_adapters/pandasthumb_mt.py new file mode 100644 index 0000000..5ec9fa1 --- /dev/null +++ b/src/groundrecall/source_adapters/pandasthumb_mt.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +from ..groundrecall_source_adapters.pandasthumb_mt import * # noqa: F403 diff --git a/tests/test_groundrecall_source_adapters.py b/tests/test_groundrecall_source_adapters.py index a3df40c..5bbd04c 100644 --- a/tests/test_groundrecall_source_adapters.py +++ b/tests/test_groundrecall_source_adapters.py @@ -27,6 +27,8 @@ def test_groundrecall_source_adapter_registry_lists_expected_adapters() -> None: assert "transcript" in names assert "didactopus_pack" in names assert "doclift_bundle" in names + assert "indexcc" in names + assert "pandasthumb_mt" in names def test_detect_llmwiki_adapter(tmp_path: Path) -> None: @@ -75,6 +77,70 @@ def test_markdown_notes_adapter_ingests_tex_files(tmp_path: Path) -> None: assert result.claims +def test_plain_markdown_directory_uses_markdown_notes_adapter(tmp_path: Path) -> None: + (tmp_path / "note.md").write_text("# Operational Note\n\nA plain note.\n", encoding="utf-8") + + adapter = detect_source_adapter(tmp_path) + + assert adapter.name == "markdown_notes" + + +def test_indexcc_adapter_import_generates_rows(tmp_path: Path) -> None: + indexcc_dir = tmp_path / "site2_src" / "content" / "indexcc" + indexcc_dir.mkdir(parents=True) + (indexcc_dir / "CA100.md").write_text( + "\n".join( + [ + "## Claim", + "", + "Argument from incredulity claim.", + "", + "## Response", + "", + "A lack of imagination is not evidence of impossibility.", + ] + ), + encoding="utf-8", + ) + (indexcc_dir / "CA100.meta.json").write_text( + '{"title": "CA100: Argument from Incredulity", "page_kind": "claim_entry", "legacy_source": "/indexcc/CA/CA100.html"}\n', + encoding="utf-8", + ) + + result = run_groundrecall_import(tmp_path, mode="quick", import_id="indexcc-test") + + assert result.manifest["source_adapter"] == "indexcc" + assert result.manifest["import_intent"] == "grounded_knowledge" + assert result.manifest["fragment_count"] == 0 + assert result.artifacts[0]["metadata"]["corpus"] == "indexcc" + assert result.claims[0]["claim_kind"] == "claim_entry" + + +def test_pandasthumb_mt_adapter_import_generates_article_rows(tmp_path: Path) -> None: + public_html = tmp_path / "public_html" + archive_dir = public_html / "archives" / "2016" / "01" + archive_dir.mkdir(parents=True) + (public_html / "index.html").write_text("PT\n", encoding="utf-8") + (archive_dir / "sample.html").write_text( + "\n".join( + [ + '

Sample Article

', + '', + '

Article body text.

', + ] + ), + encoding="utf-8", + ) + + result = run_groundrecall_import(tmp_path, mode="quick", import_id="ptmt-test") + + assert result.manifest["source_adapter"] == "pandasthumb_mt" + assert result.manifest["import_intent"] == "grounded_knowledge" + assert result.manifest["fragment_count"] == 0 + assert result.artifacts[0]["metadata"]["corpus"] == "pandasthumb_mt" + assert result.observations[0]["text"] == "Article body text." + + def test_tex_import_uses_pandoc_markdown_when_available(tmp_path: Path, monkeypatch) -> None: (tmp_path / "draft.tex").write_text( "\\section{Ignored by fallback}\n"