diff --git a/README.md b/README.md index 62ded23..fe33179 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,11 @@ PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topics PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 topic-entries abiogenesis PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 export-topic abiogenesis --output abiogenesis.bib PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 graph smith2024graphs --relation cites --depth 2 --missing-only +PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 graph smith2024graphs --relation cites --depth 2 --format json-graph +PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 graph smith2024graphs --relation cites --depth 2 --format dot +PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 graph smith2024graphs --relation cites --depth 2 --format dot --output graph.dot +PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 graph smith2024graphs --relation cites --depth 2 --format json-graph --output graph.json +PYTHONPATH=src .venv/bin/python -m citegeist graph-view graph.json --output graph.html --title "CiteGeist Graph" PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 expand smith2024graphs --source crossref PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 expand smith2024graphs --source openalex --relation cited_by --limit 10 PYTHONPATH=src .venv/bin/python -m citegeist --db library.sqlite3 expand-topic abiogenesis --topic-phrase "abiogenesis origin chemistry" --source openalex --relation cites --seed-key seed2024 --min-relevance 0.3 --preview diff --git a/src/citegeist/cli.py b/src/citegeist/cli.py index 7d50f22..6c5a85b 100644 --- a/src/citegeist/cli.py +++ b/src/citegeist/cli.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse from dataclasses import asdict +from html import escape as html_escape import json import sys from pathlib import Path @@ -82,6 +83,24 @@ def build_parser() -> argparse.ArgumentParser: action="store_true", help="Show only unresolved target nodes that are not yet present in the database", ) + graph_parser.add_argument( + "--format", + choices=["json", "dot", "json-graph"], + default="json", + help="Output format for traversed graph results", + ) + graph_parser.add_argument( + "--output", + help="Write graph output to a file instead of stdout", + ) + + graph_view_parser = subparsers.add_parser( + "graph-view", + help="Render a self-contained HTML viewer from a json-graph export", + ) + graph_view_parser.add_argument("input", help="Path to a graph JSON file exported with --format json-graph") + graph_view_parser.add_argument("--output", required=True, help="Path to write the HTML viewer") + graph_view_parser.add_argument("--title", default="CiteGeist Graph View", help="HTML page title") expand_parser = subparsers.add_parser("expand", help="Expand graph edges from external metadata sources") expand_parser.add_argument("citation_keys", nargs="+", help="Seed citation keys to expand") @@ -491,7 +510,11 @@ def main(argv: list[str] | None = None) -> int: args.depth, args.review_status, args.missing_only, + args.format, + args.output, ) + if args.command == "graph-view": + return _run_graph_view(Path(args.input), Path(args.output), args.title) if args.command == "expand": return _run_expand(store, args.citation_keys, args.source, args.relation, args.limit) if args.command == "expand-topic": @@ -763,6 +786,8 @@ def _run_graph( depth: int, review_status: str | None, missing_only: bool, + output_format: str, + output: str | None, ) -> int: rows = store.traverse_graph( citation_keys, @@ -773,10 +798,357 @@ def _run_graph( ) if missing_only: rows = [row for row in rows if not row["target_exists"]] - print(json.dumps(rows, indent=2)) + rendered: str + if output_format == "dot": + rendered = _render_graph_dot(store, citation_keys, rows) + elif output_format == "json-graph": + rendered = json.dumps(_render_graph_json(store, citation_keys, rows), indent=2) + else: + rendered = json.dumps(rows, indent=2) + if output: + Path(output).write_text(rendered + ("\n" if rendered and not rendered.endswith("\n") else ""), encoding="utf-8") + else: + print(rendered) return 0 +def _run_graph_view(input_path: Path, output_path: Path, title: str) -> int: + payload = json.loads(input_path.read_text(encoding="utf-8")) + if not isinstance(payload, dict) or not isinstance(payload.get("nodes"), list) or not isinstance(payload.get("edges"), list): + print("graph-view expects a json-graph payload with 'nodes' and 'edges'", file=sys.stderr) + return 1 + output_path.write_text(_render_graph_html(payload, title), encoding="utf-8") + return 0 + + +def _render_graph_dot( + store: BibliographyStore, + seed_keys: list[str], + rows: list[dict[str, object]], +) -> str: + node_payloads = _collect_graph_nodes(store, seed_keys, rows) + + lines = ["digraph citegeist {", " rankdir=LR;"] + for citation_key, payload in sorted(node_payloads.items()): + attributes = { + "label": _graph_node_label(payload), + "shape": "doublecircle" if payload.get("is_seed") else "ellipse", + } + if not payload.get("target_exists"): + attributes["style"] = "dashed" + attributes["color"] = "gray50" + elif payload.get("review_status") == "reviewed": + attributes["color"] = "forestgreen" + elif payload.get("review_status") == "draft": + attributes["color"] = "goldenrod" + attr_string = ", ".join(f'{key}="{_dot_escape(str(value))}"' for key, value in attributes.items()) + lines.append(f' "{_dot_escape(citation_key)}" [{attr_string}];') + + for row in rows: + source_key = _dot_escape(str(row["source_citation_key"])) + target_key = _dot_escape(str(row["target_citation_key"])) + relation_type = _dot_escape(str(row["relation_type"])) + depth_value = _dot_escape(str(row["depth"])) + lines.append( + f' "{source_key}" -> "{target_key}" [label="{relation_type} d={depth_value}"];' + ) + lines.append("}") + return "\n".join(lines) + + +def _render_graph_json( + store: BibliographyStore, + seed_keys: list[str], + rows: list[dict[str, object]], +) -> dict[str, object]: + node_payloads = _collect_graph_nodes(store, seed_keys, rows) + nodes = [] + for citation_key, payload in sorted(node_payloads.items()): + nodes.append( + { + "id": citation_key, + "label": citation_key, + "title": payload.get("title"), + "review_status": payload.get("review_status"), + "target_exists": payload.get("target_exists"), + "is_seed": payload.get("is_seed"), + } + ) + edges = [] + for index, row in enumerate(rows, start=1): + edges.append( + { + "id": f"edge-{index}", + "source": str(row["source_citation_key"]), + "target": str(row["target_citation_key"]), + "relation_type": str(row["relation_type"]), + "depth": int(row["depth"]), + "target_exists": bool(row["target_exists"]), + } + ) + return {"nodes": nodes, "edges": edges} + + +def _render_graph_html(payload: dict[str, object], title: str) -> str: + graph_json = json.dumps(payload) + safe_title = html_escape(title) + return """ + +
+ + +