Prepare public-safe repo update

2026-04-10 04:44:45 +00:00 · 2026-04-10 04:44:45 +00:00 · 1143f9bfcc
parent a6b04a995a
commit 1143f9bfcc
40 changed files with 9099 additions and 175 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,3 +7,12 @@ __pycache__/
 node_modules/
 test-results/
 playwright-report/
+*~
+*.env
+secrets*
+codex*
+restart.sh
+*lock.json
+input-data/
+legacy-data
+var/logs/
--- a/README.md
+++ b/README.md
@ -21,7 +21,7 @@ Docker Compose owns all runtime dependencies:
 - Python services run in `python:3.12-slim`
 - the Python virtual environment is created in a Docker-managed volume mounted at `/workspace/.docker/venv`
 - dependencies are installed from `apps/api/requirements.txt` inside that virtual environment
- the legacy corpus is mounted read-only from `../01-legacy-code-and-data`
+- the legacy corpus is mounted read-only from a sibling directory, defaulting to `../legacy-corpus`

 No host Python packages are required for the Compose workflow.

@ -48,6 +48,13 @@ Endpoints:
 - editor section detail/update: `/api/editor/species/<slug>/sections/<position>` (requires `editor` or `admin`)
 - editor audit history: `/api/editor/species/<slug>/audit` (requires `editor` or `admin`)

+The app can also be published under a URL prefix. A reverse-proxy deployment can publish the app at a host and path such as:
+
+- `ECOSPECIES_HOSTNAME=example.org`
+- `ECOSPECIES_BASE_PATH=/apps/ecospecies`
+
+When the site is served below a path prefix, the frontend derives its API base from the current page URL and nginx serves both the UI and proxied API under that same prefix.
+
 If those host ports are already in use, override them when starting Compose, for example:

 ```bash
@ -87,6 +94,14 @@ Run the browser-level smoke test against the real Compose stack with:
 ./scripts/check-ui-stack-smoke.sh
 ```

+Run a bounded citation backfill pass with:
+
+```bash
+./scripts/run-citation-backfill.sh
+```
+
+The wrapper runs inside `ecospecies-api`, keeps a rotating cursor in `var/citation-backfill.cursor`, and skips a run if another backfill is already active.
+
 ## Notes

 - The importer seeds PostgreSQL from the legacy text corpus before the API starts and now synchronizes by slug instead of truncating the full dataset.
@ -98,6 +113,8 @@ Run the browser-level smoke test against the real Compose stack with:
 - Initial editor auth uses `ECOSPECIES_AUTH_TOKENS` in the format `token:username:role[,token2:username2:role2]`, where `role` is `viewer`, `editor`, or `admin`.
 - Editorial workflow state is persisted per species with `draft`, `review`, and `published` statuses. Public endpoints return only `published` records; editor endpoints can inspect and update all records.
 - Editors can curate top-level metadata and section content from the web UI, and every editorial or section change is recorded in per-species audit history.
+- Citation backfill can be scheduled externally, such as with a nightly cron job that runs `./scripts/run-citation-backfill.sh`. Use `ECOSPECIES_BACKFILL_LOG_DIR` if logs should go somewhere other than `var/logs`.
+- Unresolved citation enrichment now still refreshes the locally parsed BibTeX and normalized citation text, so parser improvements propagate even without a remote metadata match.
 - Summary authoring guidance for future FLELMR-compatible records is in `docs/flelmr-authoring.md`.
 - Legacy survey and roadmap artifacts are in `docs/`.

--- a/apps/api/src/ecospecies_api/app.py
+++ b/apps/api/src/ecospecies_api/app.py
@ -15,17 +15,36 @@ from ecospecies_api.auth import (
 )
 from ecospecies_api.parser import get_default_data_dir, load_species_records
 from ecospecies_api.repository import (
+    add_species_citation_from_candidate,
+    apply_species_citation_candidate_selection,
+    create_contributor_species,
+    get_contributor_species_citations,
+    get_contributor_species_detail,
+    get_contributor_species_document,
+    get_contributor_species_list,
+    get_species_citation_candidates,
+    get_editor_species_citations,
    get_editor_species_detail,
+    get_species_document,
    get_editor_species_list,
    get_editor_species_workflow,
+    get_minimum_contributor_age,
    get_species_by_slug,
    list_species_audit,
+    list_public_bibliography,
    get_readiness_status,
    get_summary_metrics,
    has_species_data,
    import_species_payload,
    list_diagnostics,
    list_species,
+    register_contributor,
+    update_species_citation_enrichment,
+    backfill_species_citations,
+    update_species_citations_enrichment_batch,
+    update_species_citation_review,
+    update_contributor_species_document_markdown,
+    update_species_document_markdown,
    update_species_section,
    update_species_editorial,
 )
@ -99,6 +118,7 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                {
                    "authenticated": session is not None,
                    "auth_configured": auth_is_configured(),
+                    "minimum_contributor_age": get_minimum_contributor_age(),
                    "user": (
                        {"username": session.username, "role": session.role}
                        if session is not None
@ -108,6 +128,23 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
            )
            return

+        if path == "/api/contributor/status":
+            if not self.require_role(session, "contributor"):
+                return
+            self.write_json(
+                {
+                    "status": "ok",
+                    "contributor_access": True,
+                    "user": {"username": session.username, "role": session.role},
+                    "minimum_age": get_minimum_contributor_age(),
+                    "capabilities": [
+                        "create_species_draft",
+                        "edit_owned_drafts",
+                    ],
+                }
+            )
+            return
+
        if path == "/api/editor/status":
            if not self.require_role(session, "editor"):
                return
@ -135,10 +172,42 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                    "slug": item["slug"],
                    "title": item["title"],
                    "common_name": item["common_name"],
+                    "scientific_name": item["scientific_name"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
                    "publication_status": item["publication_status"],
                    "is_archived": item["is_archived"],
                    "last_modified_by": item["last_modified_by"],
                    "diagnostic_count": len(item["diagnostics"]),
+                    "summary": item["summary"],
+                }
+                for item in items
+            ]
+            self.write_json({"items": compact, "count": len(compact)})
+            return
+
+        if path == "/api/contributor/species":
+            if not self.require_role(session, "contributor"):
+                return
+            search = query.get("search", [""])[0].strip().lower()
+            items = get_contributor_species_list(session.username, search)
+            compact = [
+                {
+                    "slug": item["slug"],
+                    "title": item["title"],
+                    "common_name": item["common_name"],
+                    "scientific_name": item["scientific_name"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
+                    "publication_status": item["publication_status"],
+                    "is_archived": item["is_archived"],
+                    "last_modified_by": item["last_modified_by"],
+                    "diagnostic_count": len(item["diagnostics"]),
+                    "summary": item["summary"],
                }
                for item in items
            ]
@ -176,7 +245,68 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
            self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
            return

-        if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit"):
+        if path.startswith("/api/editor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "editor"):
+                return
+            slug = path[len("/api/editor/species/") : -len("/document")].strip("/")
+            item = get_species_document(slug)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations"):
+            if not self.require_role(session, "editor"):
+                return
+            slug = path[len("/api/editor/species/") : -len("/citations")].strip("/")
+            item = get_editor_species_citations(slug)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and "/citations/" in path and path.endswith("/candidates"):
+            if not self.require_role(session, "editor"):
+                return
+            slug, _, tail = path[len("/api/editor/species/") :].partition("/citations/")
+            citation_tail = tail[: -len("/candidates")].strip("/")
+            try:
+                citation_id = int(citation_tail)
+            except ValueError:
+                self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            item = get_species_citation_candidates(slug.strip("/"), citation_id)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") : -len("/document")].strip("/")
+            item = get_contributor_species_document(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/citations"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") : -len("/citations")].strip("/")
+            item = get_contributor_species_citations(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
+        if path.startswith("/api/editor/species/") and not path.endswith("/workflow") and not path.endswith("/editorial") and not path.endswith("/audit") and not path.endswith("/document"):
            if not self.require_role(session, "editor"):
                return
            slug = path[len("/api/editor/species/") :].strip("/")
@ -187,6 +317,17 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
            self.write_json(item)
            return

+        if path.startswith("/api/contributor/species/") and not path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+            slug = path[len("/api/contributor/species/") :].strip("/")
+            item = get_contributor_species_detail(slug, session.username)
+            if item is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+            self.write_json(item)
+            return
+
        if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
            if not self.require_role(session, "editor"):
                return
@ -215,6 +356,12 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
            self.write_json({"items": flagged, "count": len(flagged)})
            return

+        if path == "/api/bibliography":
+            search = query.get("search", [""])[0].strip()
+            items = list_public_bibliography(search=search)
+            self.write_json({"items": items, "count": len(items)})
+            return
+
        if path == "/api/species":
            search = query.get("search", [""])[0].strip().lower()
            species = list_species(search)
@ -225,6 +372,10 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
                    "common_name": item["common_name"],
                    "scientific_name": item["scientific_name"],
                    "flelmr_code": item["flelmr_code"],
+                    "legacy_identifiers": item["legacy_identifiers"],
+                    "taxon_identifiers": item["taxon_identifiers"],
+                    "primary_taxon_authority": item["primary_taxon_authority"],
+                    "primary_taxon_identifier": item["primary_taxon_identifier"],
                    "summary": item["summary"],
                    "section_count": item["section_count"],
                    "diagnostic_count": len(item["diagnostics"]),
@ -250,6 +401,47 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
        path = parsed.path
        session = resolve_auth_session(self.headers)

+        if path == "/api/contributor/register":
+            payload = self.read_json_body()
+            if payload is None:
+                return
+            email = payload.get("email")
+            age_gate_confirmed = payload.get("age_gate_confirmed")
+            if not isinstance(email, str):
+                self.write_json({"error": "email must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            if not isinstance(age_gate_confirmed, bool):
+                self.write_json(
+                    {"error": "age_gate_confirmed must be a boolean"},
+                    status=HTTPStatus.BAD_REQUEST,
+                )
+                return
+            try:
+                result = register_contributor(email=email, age_gate_confirmed=age_gate_confirmed)
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+            self.write_json({"status": "ok", **result}, status=HTTPStatus.CREATED)
+            return
+
+        if path == "/api/contributor/species":
+            if not self.require_role(session, "contributor"):
+                return
+            payload = self.read_json_body()
+            if payload is None:
+                return
+            markdown = payload.get("markdown")
+            if markdown is not None and not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+            try:
+                result = create_contributor_species(session.username, markdown)
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+            self.write_json({"status": "ok", **result}, status=HTTPStatus.CREATED)
+            return
+
        if path.startswith("/api/editor/species/") and path.endswith("/workflow"):
            if not self.require_role(session, "editor"):
                return
@ -341,6 +533,229 @@ class EcoSpeciesHandler(BaseHTTPRequestHandler):
            self.write_json({"status": "ok", **result})
            return

+        if path.startswith("/api/editor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            markdown = payload.get("markdown")
+            if not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/document")].strip("/")
+            try:
+                result = update_species_document_markdown(
+                    slug=slug,
+                    markdown=markdown,
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if (
+            path.startswith("/api/editor/species/")
+            and "/citations/" in path
+            and not path.endswith("/citations/enrich")
+            and not path.endswith("/citations/backfill")
+        ):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug, _, tail = path[len("/api/editor/species/") :].partition("/citations/")
+            if tail.endswith("/enrich"):
+                citation_tail = tail[: -len("/enrich")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+
+                result = update_species_citation_enrichment(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+
+                self.write_json({"status": "ok", **result})
+                return
+
+            if tail.endswith("/apply-match"):
+                citation_tail = tail[: -len("/apply-match")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                candidate = payload.get("candidate")
+                if not isinstance(candidate, dict):
+                    self.write_json({"error": "candidate must be an object"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                result = apply_species_citation_candidate_selection(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    candidate=candidate,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+                self.write_json({"status": "ok", **result})
+                return
+
+            if tail.endswith("/add-match"):
+                citation_tail = tail[: -len("/add-match")].strip("/")
+                try:
+                    citation_id = int(citation_tail)
+                except ValueError:
+                    self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                candidate = payload.get("candidate")
+                if not isinstance(candidate, dict):
+                    self.write_json({"error": "candidate must be an object"}, status=HTTPStatus.BAD_REQUEST)
+                    return
+                result = add_species_citation_from_candidate(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    candidate=candidate,
+                    username=session.username,
+                )
+                if result is None:
+                    self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                    return
+                self.write_json({"status": "ok", **result})
+                return
+
+            try:
+                citation_id = int(tail.strip("/"))
+            except ValueError:
+                self.write_json({"error": "Invalid citation id"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            for field in ("review_status", "normalized_text", "abstract_text", "doi", "citation_key", "entry_type", "draft_bibtex"):
+                value = payload.get(field)
+                if value is not None and not isinstance(value, str):
+                    self.write_json(
+                        {"error": f"{field} must be a string"},
+                        status=HTTPStatus.BAD_REQUEST,
+                    )
+                    return
+
+            try:
+                result = update_species_citation_review(
+                    slug=slug.strip("/"),
+                    citation_id=citation_id,
+                    review_status=payload.get("review_status"),
+                    normalized_text=payload.get("normalized_text"),
+                    doi=payload.get("doi"),
+                    citation_key=payload.get("citation_key"),
+                    entry_type=payload.get("entry_type"),
+                    draft_bibtex=payload.get("draft_bibtex"),
+                    abstract_text=payload.get("abstract_text"),
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations/enrich"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/citations/enrich")].strip("/")
+            result = update_species_citations_enrichment_batch(
+                slug=slug,
+                username=session.username,
+            )
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/editor/species/") and path.endswith("/citations/backfill"):
+            if not self.require_role(session, "editor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            slug = path[len("/api/editor/species/") : -len("/citations/backfill")].strip("/")
+            include_accepted = bool(payload.get("include_accepted", False))
+            result = backfill_species_citations(
+                slug=slug,
+                username=session.username,
+                include_accepted=include_accepted,
+            )
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
+        if path.startswith("/api/contributor/species/") and path.endswith("/document"):
+            if not self.require_role(session, "contributor"):
+                return
+
+            payload = self.read_json_body()
+            if payload is None:
+                return
+
+            markdown = payload.get("markdown")
+            if not isinstance(markdown, str):
+                self.write_json({"error": "markdown must be a string"}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            slug = path[len("/api/contributor/species/") : -len("/document")].strip("/")
+            try:
+                result = update_contributor_species_document_markdown(
+                    slug=slug,
+                    markdown=markdown,
+                    username=session.username,
+                )
+            except ValueError as exc:
+                self.write_json({"error": str(exc)}, status=HTTPStatus.BAD_REQUEST)
+                return
+
+            if result is None:
+                self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)
+                return
+
+            self.write_json({"status": "ok", **result})
+            return
+
        self.write_json({"error": "Not found"}, status=HTTPStatus.NOT_FOUND)

    def log_message(self, format: str, *args: object) -> None:
--- a/apps/api/src/ecospecies_api/auth.py
+++ b/apps/api/src/ecospecies_api/auth.py
@ -1,14 +1,21 @@
 from __future__ import annotations

+import hashlib
 import os
 from dataclasses import dataclass
 from typing import Mapping

+from sqlalchemy import select
+
+from ecospecies_api.db import SessionLocal, create_db_engine
+from ecospecies_api.models import Base, ContributorAccount
+

 ROLE_ORDER = {
    "viewer": 1,
-    "editor": 2,
-    "admin": 3,
+    "contributor": 2,
+    "editor": 3,
+    "admin": 4,
 }


@ -41,17 +48,27 @@ def _parse_token_entry(entry: str) -> tuple[str, AuthSession]:


 def get_token_registry() -> dict[str, AuthSession]:
-    configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip()
-    if not configured:
-        return {}
-
    registry: dict[str, AuthSession] = {}
-    for raw_entry in configured.split(","):
-        entry = raw_entry.strip()
-        if not entry:
-            continue
-        token, session = _parse_token_entry(entry)
-        registry[token] = session
+    configured = os.environ.get("ECOSPECIES_AUTH_TOKENS", "").strip()
+    if configured:
+        for raw_entry in configured.split(","):
+            entry = raw_entry.strip()
+            if not entry:
+                continue
+            token, session = _parse_token_entry(entry)
+            registry[token] = session
+
+    engine = create_db_engine()
+    Base.metadata.create_all(engine)
+    with SessionLocal() as session:
+        for account in session.scalars(
+            select(ContributorAccount).where(ContributorAccount.is_active.is_(True))
+        ):
+            registry[account.token_hash] = AuthSession(
+                token=account.token_hash,
+                username=account.email,
+                role="contributor",
+            )
    return registry


@ -70,7 +87,11 @@ def resolve_auth_session(headers: Mapping[str, str]) -> AuthSession | None:
    token = get_bearer_token(headers)
    if not token:
        return None
-    return registry.get(token)
+    direct = registry.get(token)
+    if direct is not None:
+        return direct
+    token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest()
+    return registry.get(token_hash)


 def auth_is_configured() -> bool:
--- a/apps/api/src/ecospecies_api/citation_enrichment.py
+++ b/apps/api/src/ecospecies_api/citation_enrichment.py
--- a/apps/api/src/ecospecies_api/citegeist_bridge.py
+++ b/apps/api/src/ecospecies_api/citegeist_bridge.py
@ -0,0 +1,387 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+import re
+import sys
+
+
+def _load_citegeist_extract():
+    citegeist_src = Path(__file__).resolve().parents[5] / "CiteGeist" / "src"
+    if citegeist_src.exists() and str(citegeist_src) not in sys.path:
+        sys.path.insert(0, str(citegeist_src))
+    try:
+        from citegeist.extract import extract_references  # type: ignore
+    except ImportError:
+        return None
+    return extract_references
+
+
+@dataclass
+class DraftCitation:
+    citation_key: str
+    entry_type: str
+    fields: dict[str, str]
+    draft_bibtex: str
+
+
+STOPWORD_TOKENS = {
+    "a",
+    "an",
+    "and",
+    "for",
+    "from",
+    "in",
+    "of",
+    "on",
+    "the",
+    "to",
+    "with",
+}
+HISTORICAL_YEAR_PATTERN = r"(1\d{3}|20\d{2})"
+
+
+def build_standard_citation_key(
+    authors: str = "",
+    year: str = "",
+    title: str = "",
+    fallback_text: str = "",
+) -> str:
+    family_name = _family_name_stem(authors or fallback_text)
+    year_stem = re.sub(r"[^0-9]+", "", year)[:4]
+    topic_stem = _topic_stem(title or fallback_text)
+    key = f"{family_name}{year_stem}{topic_stem}"
+    return key or "reference"
+
+
+def extract_draft_citation(raw_text: str, legacy_reference_number: str = "") -> DraftCitation | None:
+    extractor = _load_citegeist_extract()
+    if extractor is None:
+        return _fallback_citation(raw_text, legacy_reference_number)
+
+    entries = extractor(raw_text)
+    if not entries:
+        return _fallback_citation(raw_text, legacy_reference_number)
+
+    entry = entries[0]
+    fields = dict(entry.fields)
+    fields = _repair_reference_fields(raw_text, fields)
+    citation_key = build_standard_citation_key(
+        authors=str(fields.get("author", "")),
+        year=str(fields.get("year", "")),
+        title=str(fields.get("title", "")),
+        fallback_text=raw_text,
+    )
+    note_parts = [fields.get("note", "").strip()] if fields.get("note") else []
+    if legacy_reference_number:
+        note_parts.append(f"ecospecies_reference_number = {{{legacy_reference_number}}}")
+    fields["note"] = "; ".join(part for part in note_parts if part)
+    draft_bibtex = render_single_bibtex(entry.entry_type, citation_key, fields)
+    return DraftCitation(
+        citation_key=citation_key,
+        entry_type=entry.entry_type,
+        fields=fields,
+        draft_bibtex=draft_bibtex,
+    )
+
+
+def _fallback_citation(raw_text: str, legacy_reference_number: str) -> DraftCitation:
+    year_match = re.search(rf"\b{HISTORICAL_YEAR_PATTERN}\b", raw_text)
+    year = year_match.group(0) if year_match else ""
+    fields = _repair_reference_fields(
+        raw_text,
+        {
+            "title": raw_text.strip(),
+            "year": year,
+        },
+    )
+    title = str(fields.get("title", "")).strip() or raw_text.strip()
+    citation_key = build_standard_citation_key(year=year, title=title, fallback_text=raw_text)
+    fields["note"] = f"raw_reference = {{{raw_text}}}"
+    if legacy_reference_number:
+        fields["note"] += f"; ecospecies_reference_number = {{{legacy_reference_number}}}"
+    draft_bibtex = render_single_bibtex("misc", citation_key, fields)
+    return DraftCitation(
+        citation_key=citation_key,
+        entry_type="misc",
+        fields=fields,
+        draft_bibtex=draft_bibtex,
+    )
+
+
+def _family_name_stem(raw_text: str) -> str:
+    compact = raw_text.strip()
+    if not compact:
+        return "ref"
+    if "," in compact:
+        compact = compact.split(",", 1)[0]
+    else:
+        compact = compact.split()[0]
+    compact = re.sub(r"[^A-Za-z0-9]+", "", compact).lower()
+    return compact or "ref"
+
+
+def _topic_stem(raw_text: str) -> str:
+    tokens = [
+        token
+        for token in re.findall(r"[A-Za-z0-9]+", raw_text.lower())
+        if token not in STOPWORD_TOKENS and not token.isdigit()
+    ]
+    topic_tokens = tokens[:3] or ["topic"]
+    return "".join(topic_tokens)
+
+
+def _repair_reference_fields(raw_text: str, fields: dict[str, str]) -> dict[str, str]:
+    repaired = dict(fields)
+    title = str(repaired.get("title", "")).strip()
+    raw = raw_text.strip()
+    if not raw:
+        return repaired
+
+    parsed = _parse_report_style_reference(raw)
+    if parsed is None:
+        return repaired
+
+    current_venue = (
+        str(repaired.get("journal", "")).strip()
+        or str(repaired.get("howpublished", "")).strip()
+        or str(repaired.get("booktitle", "")).strip()
+        or str(repaired.get("publisher", "")).strip()
+    )
+    parsed_venue = str(parsed.get("venue", "")).strip()
+    needs_structural_repair = bool(
+        parsed_venue
+        and (
+            not current_venue
+            or len(current_venue) < max(8, len(parsed_venue) // 2)
+            or current_venue.lower() not in parsed_venue.lower()
+            or (parsed.get("volume") and not str(repaired.get("volume", "")).strip())
+            or (parsed.get("number") and not str(repaired.get("number", "")).strip())
+            or (parsed.get("pages") and not str(repaired.get("pages", "")).strip())
+        )
+    )
+    if title and not _title_looks_like_raw_reference(title) and not needs_structural_repair:
+        return repaired
+
+    if parsed.get("author"):
+        repaired["author"] = parsed["author"]
+    if parsed.get("year"):
+        repaired["year"] = parsed["year"]
+    if parsed.get("title"):
+        repaired["title"] = parsed["title"]
+    venue = parsed.get("venue", "")
+    if venue:
+        repaired.pop("howpublished", None)
+        if _venue_looks_journal_like(venue):
+            repaired["journal"] = venue
+        else:
+            repaired["howpublished"] = venue
+    if parsed.get("volume"):
+        repaired["volume"] = parsed["volume"]
+    if parsed.get("number"):
+        repaired["number"] = parsed["number"]
+    if parsed.get("pages"):
+        repaired["pages"] = parsed["pages"]
+    return repaired
+
+
+def _title_looks_like_raw_reference(title: str) -> bool:
+    compact = " ".join(title.split()).strip()
+    if not compact:
+        return True
+    if len(compact) > 120:
+        return True
+    return bool(re.match(rf"^[^,]+,\s+.+\b{HISTORICAL_YEAR_PATTERN}\.\s+", compact))
+
+
+def _parse_report_style_reference(raw_text: str) -> dict[str, str] | None:
+    match = re.match(
+        rf"^(?P<author>.+?)\s+(?P<year>{HISTORICAL_YEAR_PATTERN})\.\s+(?P<remainder>.+)$",
+        raw_text.strip(),
+    )
+    if match is None:
+        return None
+
+    author = match.group("author").strip(" .")
+    year = match.group("year").strip()
+    remainder = match.group("remainder").strip()
+    if not author or not remainder:
+        return None
+
+    venue_start = _find_venue_start(remainder)
+    if venue_start is None:
+        return {
+            "author": author,
+            "year": year,
+            "title": remainder.strip(" ."),
+            "venue": "",
+        }
+
+    title = remainder[:venue_start].strip(" .")
+    venue_part = remainder[venue_start:].strip(" .")
+    venue, volume, number, pages = _split_venue_and_locator(venue_part)
+    return {
+        "author": author,
+        "year": year,
+        "title": title,
+        "venue": venue,
+        "volume": volume,
+        "number": number,
+        "pages": pages,
+    }
+
+
+def _split_venue_and_locator(venue_part: str) -> tuple[str, str, str, str]:
+    compact = venue_part.strip(" .")
+    if not compact:
+        return "", "", "", ""
+
+    match = re.search(
+        r"(?P<venue>.+?)\.\s+(?P<volume>\d+)(?:\((?P<number>[^)]+)\))?\s*:\s*(?P<pages>\d+(?:-\d+)?)\.?$",
+        compact,
+    )
+    if match is None:
+        match = re.search(
+            r"(?P<venue>.+?)\s+(?P<volume>\d+)(?:\((?P<number>[^)]+)\))?\s*:\s*(?P<pages>\d+(?:-\d+)?)\.?$",
+            compact,
+        )
+    if match is None:
+        return compact, "", "", ""
+
+    return (
+        match.group("venue").strip(" ."),
+        (match.group("volume") or "").strip(),
+        (match.group("number") or "").strip(),
+        (match.group("pages") or "").strip(),
+    )
+
+
+def _find_venue_start(remainder: str) -> int | None:
+    for match in re.finditer(r"\.\s+", remainder):
+        candidate_start = match.end()
+        candidate = remainder[candidate_start:].strip()
+        if _looks_like_publication_segment(candidate):
+            return candidate_start
+
+    lowered = remainder.lower()
+    markers = (
+        "comm. rept.",
+        "rept.",
+        "proc.",
+        "procs.",
+        "journal",
+        "transactions",
+        "proceedings",
+        "bulletin",
+        "bull.",
+        "occas. pap.",
+        "pap.",
+        "memoir",
+        "memorandum",
+        "memo.",
+        "tech. memo.",
+        "tech memo",
+        "technical memorandum",
+        "technical report",
+        "noaa",
+    )
+    positions = [lowered.find(marker) for marker in markers if lowered.find(marker) > 0]
+    if positions:
+        return min(positions)
+    return None
+
+
+def _looks_like_publication_segment(candidate: str) -> bool:
+    compact = candidate.strip(" .")
+    if not compact:
+        return False
+
+    venue, volume, number, pages = _split_venue_and_locator(compact)
+    if venue and (volume or number or pages) and _starts_with_publication_marker(compact):
+        return True
+
+    return _starts_with_publication_marker(compact)
+
+
+def _starts_with_publication_marker(text: str) -> bool:
+    lowered = text.lower()
+    publication_starts = (
+        "comm. rept.",
+        "rept.",
+        "proc.",
+        "procs.",
+        "journal",
+        "transactions",
+        "proceedings",
+        "bulletin",
+        "bull.",
+        "occas. pap.",
+        "pap.",
+        "memoir",
+        "memorandum",
+        "memo.",
+        "tech. memo.",
+        "tech memo",
+        "technical memorandum",
+        "technical report",
+        "noaa",
+        "u.s.",
+    )
+    return lowered.startswith(publication_starts)
+
+
+def _venue_looks_journal_like(venue: str) -> bool:
+    lowered = venue.lower()
+    return any(
+        token in lowered
+        for token in (
+            "journal",
+            "transactions",
+            "review",
+            "letters",
+            "comm. rept.",
+            "rept.",
+            "proc.",
+            "proceedings",
+            "occas. pap.",
+            "pap.",
+        )
+    )
+
+
+def render_single_bibtex(entry_type: str, citation_key: str, fields: dict[str, str]) -> str:
+    lines = [f"@{entry_type}{{{citation_key},"]
+    for key in sorted(fields):
+        value = _sanitize_bibtex_value(fields[key])
+        lines.append(f"  {key} = {{{value}}},")
+    lines.append("}")
+    return "\n".join(lines)
+
+
+def _sanitize_bibtex_value(value: str) -> str:
+    depth = 0
+    parts: list[str] = []
+    for char in value:
+        if char == "{":
+            depth += 1
+            parts.append(char)
+            continue
+        if char == "}":
+            if depth == 0:
+                parts.append(")")
+            else:
+                depth -= 1
+                parts.append(char)
+            continue
+        parts.append(char)
+    if depth > 0:
+        open_count = depth
+        normalized: list[str] = []
+        for char in parts:
+            if char == "{" and open_count > 0:
+                normalized.append("(")
+                open_count -= 1
+            else:
+                normalized.append(char)
+        return "".join(normalized)
+    return "".join(parts)
--- a/apps/api/src/ecospecies_api/document_format.py
+++ b/apps/api/src/ecospecies_api/document_format.py
@ -0,0 +1,480 @@
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import asdict, dataclass, field
+
+
+HEADING_PATTERN = re.compile(r"^(#{2,6})\s+(?P<title>.+?)\s*$")
+INDENTED_ITEM_PATTERN = re.compile(r"^\s*-\s*(?P<body>.+?)\s*$")
+DOI_PATTERN = re.compile(r"\b10\.\d{4,9}/[-._;()/:A-Za-z0-9]+\b")
+
+
+@dataclass
+class DocumentNode:
+    node_type: str
+    title: str
+    body: str
+    depth: int
+    children: list["DocumentNode"] = field(default_factory=list)
+
+
+@dataclass
+class StructuredDocument:
+    metadata: dict[str, object]
+    nodes: list[DocumentNode]
+
+
+def _parse_scalar_value(value: str) -> object:
+    stripped = value.strip()
+    if not stripped:
+        return ""
+    if stripped.lower() == "true":
+        return True
+    if stripped.lower() == "false":
+        return False
+    if stripped.startswith("{") or stripped.startswith("["):
+        try:
+            return json.loads(stripped)
+        except json.JSONDecodeError:
+            return stripped
+    return stripped
+
+
+def _normalize_whitespace(value: str) -> str:
+    return re.sub(r"\s+", " ", value).strip()
+
+
+def _parse_front_matter(front_matter: str) -> dict[str, object]:
+    metadata: dict[str, object] = {}
+    lines = front_matter.splitlines()
+    index = 0
+
+    while index < len(lines):
+        raw_line = lines[index]
+        if not raw_line.strip() or raw_line.lstrip().startswith("#"):
+            index += 1
+            continue
+        if ":" not in raw_line:
+            index += 1
+            continue
+
+        key, value = raw_line.split(":", 1)
+        normalized_key = key.strip()
+        stripped_value = value.strip()
+        if stripped_value:
+            metadata[normalized_key] = _parse_scalar_value(stripped_value)
+            index += 1
+            continue
+
+        items: list[dict[str, object]] = []
+        index += 1
+        while index < len(lines):
+            item_line = lines[index]
+            if not item_line.strip():
+                index += 1
+                continue
+            if not item_line.startswith("  - "):
+                break
+
+            match = INDENTED_ITEM_PATTERN.match(item_line)
+            if not match:
+                break
+            item: dict[str, object] = {}
+            first_body = match.group("body")
+            if ":" in first_body:
+                item_key, item_value = first_body.split(":", 1)
+                item[item_key.strip()] = _parse_scalar_value(item_value.strip())
+            index += 1
+
+            while index < len(lines):
+                nested_line = lines[index]
+                if nested_line.startswith("    ") and ":" in nested_line.strip():
+                    nested_key, nested_value = nested_line.strip().split(":", 1)
+                    item[nested_key.strip()] = _parse_scalar_value(nested_value.strip())
+                    index += 1
+                    continue
+                break
+
+            items.append(item)
+
+        metadata[normalized_key] = items
+
+    return metadata
+
+
+def _split_front_matter(text: str) -> tuple[dict[str, object], str]:
+    stripped = text.lstrip()
+    if not stripped.startswith("---\n"):
+        return {}, text
+
+    _, _, remainder = stripped.partition("---\n")
+    front_matter, separator, body = remainder.partition("\n---\n")
+    if not separator:
+        return {}, text
+
+    return _parse_front_matter(front_matter), body
+
+
+def parse_markdown_document(text: str) -> StructuredDocument:
+    metadata, body = _split_front_matter(text)
+    root_nodes: list[DocumentNode] = []
+    stack: list[DocumentNode] = []
+    body_lines: list[str] = []
+
+    def flush_body() -> None:
+        if not stack:
+            body_lines.clear()
+            return
+        stack[-1].body = "\n".join(body_lines).strip()
+        body_lines.clear()
+
+    for raw_line in body.splitlines():
+        match = HEADING_PATTERN.match(raw_line)
+        if not match:
+            body_lines.append(raw_line)
+            continue
+
+        flush_body()
+        depth = len(match.group(1))
+        node = DocumentNode(
+            node_type="section",
+            title=match.group("title").strip(),
+            body="",
+            depth=depth,
+        )
+
+        while stack and stack[-1].depth >= depth:
+            stack.pop()
+
+        if stack:
+            stack[-1].children.append(node)
+        else:
+            root_nodes.append(node)
+        stack.append(node)
+
+    flush_body()
+    return StructuredDocument(metadata=metadata, nodes=root_nodes)
+
+
+def validate_markdown_document(text: str) -> list[str]:
+    errors: list[str] = []
+    metadata, body = _split_front_matter(text)
+    if not metadata:
+        errors.append("Markdown document must include YAML front matter.")
+
+    last_depth: int | None = None
+    for raw_line in body.splitlines():
+        match = HEADING_PATTERN.match(raw_line)
+        if not match:
+            continue
+        depth = len(match.group(1))
+        if last_depth is not None and depth > last_depth + 1:
+            errors.append(
+                f"Heading depth jumps from level {last_depth} to level {depth}: {match.group('title').strip()}"
+            )
+        last_depth = depth
+
+    return errors
+
+
+def _append_metadata_lines(lines: list[str], key: str, value: object) -> None:
+    if isinstance(value, list):
+        lines.append(f"{key}:")
+        for item in value:
+            if isinstance(item, dict) and item:
+                first = True
+                for item_key, item_value in item.items():
+                    rendered = "true" if item_value is True else "false" if item_value is False else str(item_value)
+                    prefix = "  - " if first else "    "
+                    lines.append(f"{prefix}{item_key}: {rendered}")
+                    first = False
+            else:
+                lines.append(f"  - {item}")
+        return
+
+    rendered = "true" if value is True else "false" if value is False else str(value)
+    lines.append(f"{key}: {rendered}")
+
+
+def export_markdown_document(document: StructuredDocument) -> str:
+    lines: list[str] = ["---"]
+    for key, value in document.metadata.items():
+        _append_metadata_lines(lines, key, value)
+    lines.append("---")
+    lines.append("")
+
+    def append_nodes(nodes: list[DocumentNode]) -> None:
+        for node in nodes:
+            lines.append(f"{'#' * node.depth} {node.title}")
+            if node.body:
+                lines.append(node.body)
+            lines.append("")
+            append_nodes(node.children)
+
+    append_nodes(document.nodes)
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def flatten_document_nodes(document: StructuredDocument) -> list[dict[str, object]]:
+    flattened: list[dict[str, object]] = []
+
+    def visit(nodes: list[DocumentNode], parent_id: str | None) -> None:
+        for index, node in enumerate(nodes, start=1):
+            node_id = f"node-{len(flattened) + 1}"
+            flattened.append(
+                {
+                    "node_id": node_id,
+                    "parent_id": parent_id,
+                    "position": index,
+                    "depth": node.depth,
+                    "node_type": node.node_type,
+                    "title": node.title,
+                    "body_markdown": node.body,
+                    "body_plaintext": node.body,
+                }
+            )
+            visit(node.children, node_id)
+
+    visit(document.nodes, None)
+    return flattened
+
+
+def document_to_json(document: StructuredDocument) -> str:
+    return json.dumps(asdict(document), ensure_ascii=True)
+
+
+def build_document_from_species_payload(item: dict[str, object]) -> StructuredDocument:
+    legacy_identifiers: list[dict[str, object]] = []
+    if item.get("flelmr_code"):
+        legacy_identifiers.append(
+            {
+                "authority": "legacy-ecospecies",
+                "identifier": str(item.get("flelmr_code", "")),
+                "label": "FLELMR",
+            }
+        )
+
+    metadata = {
+        "title": str(item.get("title", "")),
+        "common_name": str(item.get("common_name", "")),
+        "scientific_name": str(item.get("scientific_name", "")),
+        "legacy_identifiers": legacy_identifiers,
+        "taxon_identifiers": list(item.get("taxon_identifiers", [])),
+        "primary_taxon_authority": str(item.get("primary_taxon_authority", "")),
+        "source_file": str(item.get("source_file", "")),
+        "publication_status": str(item.get("publication_status", "published")),
+        "source_format": "ecospecies-markdown-v1",
+    }
+
+    nodes: list[DocumentNode] = []
+    summary = str(item.get("summary", "")).strip()
+    if summary:
+        nodes.append(
+            DocumentNode(
+                node_type="section",
+                title="Summary",
+                body=summary,
+                depth=2,
+            )
+        )
+
+    for section in item.get("sections", []):
+        heading = str(section.get("heading", "")).strip()
+        if not heading or heading == "HEADER":
+            continue
+        nodes.append(
+            DocumentNode(
+                node_type="section",
+                title=heading,
+                body=str(section.get("content", "")).strip(),
+                depth=2,
+            )
+        )
+
+    return StructuredDocument(metadata=metadata, nodes=nodes)
+
+
+def extract_species_projection(document: StructuredDocument) -> dict[str, object]:
+    metadata = document.metadata
+    summary = ""
+    sections: list[dict[str, object]] = []
+    legacy_identifiers = metadata.get("legacy_identifiers", [])
+    taxon_identifiers = metadata.get("taxon_identifiers", [])
+
+    flelmr_code = ""
+    if isinstance(legacy_identifiers, list):
+        for item in legacy_identifiers:
+            if not isinstance(item, dict):
+                continue
+            authority = str(item.get("authority", "")).strip().lower()
+            label = str(item.get("label", "")).strip().lower()
+            if authority == "legacy-ecospecies" or label == "flelmr":
+                flelmr_code = str(item.get("identifier", "")).strip()
+                if flelmr_code:
+                    break
+    if not flelmr_code:
+        flelmr_code = str(metadata.get("species_code", "")).strip()
+
+    def visit(nodes: list[DocumentNode], path: list[str]) -> None:
+        nonlocal summary
+        for node in nodes:
+            current_path = [*path, node.title]
+            if node.title.lower() == "summary" and not summary:
+                summary = node.body.strip()
+            else:
+                sections.append(
+                    {
+                        "heading": " / ".join(current_path),
+                        "content": node.body.strip(),
+                    }
+                )
+            visit(node.children, current_path)
+
+    visit(document.nodes, [])
+    return {
+        "title": metadata.get("title", ""),
+        "common_name": metadata.get("common_name", ""),
+        "scientific_name": metadata.get("scientific_name", ""),
+        "flelmr_code": flelmr_code,
+        "legacy_identifiers": legacy_identifiers if isinstance(legacy_identifiers, list) else [],
+        "taxon_identifiers": taxon_identifiers if isinstance(taxon_identifiers, list) else [],
+        "primary_taxon_authority": str(metadata.get("primary_taxon_authority", "")),
+        "summary": summary,
+        "sections": sections,
+    }
+
+
+def _is_citation_heading(title: str) -> bool:
+    lowered = title.strip().rstrip(":").lower()
+    return lowered in {
+        "references",
+        "reference",
+        "citations",
+        "citation",
+        "bibliography",
+        "related references",
+        "related citations",
+    }
+
+
+def _split_citation_lines(body: str) -> list[str]:
+    entries: list[dict[str, str]] = []
+    current: list[str] = []
+    current_number = ""
+
+    def flush() -> None:
+        nonlocal current_number
+        if not current:
+            return
+        compact = " ".join(part.strip() for part in current if part.strip()).strip()
+        if compact:
+            entries.append(
+                {
+                    "legacy_reference_number": current_number,
+                    "raw_text": compact,
+                }
+            )
+        current.clear()
+        current_number = ""
+
+    for raw_line in body.splitlines():
+        stripped = raw_line.strip()
+        if not stripped:
+            flush()
+            continue
+
+        leading_number_match = re.match(r"^(?P<num>\d+)\s*,\s*(?P<text>.+)$", stripped)
+        if leading_number_match:
+            flush()
+            current_number = leading_number_match.group("num")
+            current.append(leading_number_match.group("text"))
+            continue
+
+        bare_number_match = re.match(r"^(?P<num>\d+)\s+(?P<text>[A-Z].+)$", stripped)
+        if bare_number_match:
+            flush()
+            current_number = bare_number_match.group("num")
+            current.append(bare_number_match.group("text"))
+            continue
+
+        bullet_match = re.match(
+            r"^(?:[-*]|\[(?P<bracket_num>\d+)\]|(?P<plain_num>\d+)[\.,])\s+(?P<text>.+)$",
+            stripped,
+        )
+        if bullet_match:
+            flush()
+            current_number = bullet_match.group("bracket_num") or bullet_match.group("plain_num") or ""
+            bullet_text = bullet_match.group("text")
+            if not current_number:
+                nested_number_match = re.match(r"^\[(?P<num>\d+)\]\s+(?P<text>.+)$", bullet_text)
+                if nested_number_match:
+                    current_number = nested_number_match.group("num")
+                    bullet_text = nested_number_match.group("text")
+                else:
+                    nested_comma_match = re.match(r"^(?P<num>\d+)\s*,\s*(?P<text>.+)$", bullet_text)
+                    if nested_comma_match:
+                        current_number = nested_comma_match.group("num")
+                        bullet_text = nested_comma_match.group("text")
+            current.append(bullet_text)
+            continue
+
+        current.append(stripped)
+
+    flush()
+    return entries
+
+
+def extract_citation_entries(document: StructuredDocument) -> list[dict[str, object]]:
+    entries: list[dict[str, object]] = []
+
+    def visit(nodes: list[DocumentNode], path: list[str]) -> None:
+        for node in nodes:
+            current_path = [*path, node.title]
+            if _is_citation_heading(node.title):
+                section_heading = " / ".join(current_path)
+                for item in _split_citation_lines(node.body):
+                    raw_text = item["raw_text"]
+                    doi_match = DOI_PATTERN.search(raw_text)
+                    entries.append(
+                        {
+                            "section_heading": section_heading,
+                            "legacy_reference_number": item["legacy_reference_number"],
+                            "raw_text": raw_text,
+                            "normalized_text": _normalize_whitespace(raw_text),
+                            "doi": doi_match.group(0) if doi_match else "",
+                        }
+                    )
+            visit(node.children, current_path)
+
+    visit(document.nodes, [])
+    return entries
+
+
+def add_citation_to_document(
+    document: StructuredDocument,
+    citation_text: str,
+    heading_title: str = "Related References",
+) -> bool:
+    normalized_citation = _normalize_whitespace(citation_text)
+    if not normalized_citation:
+        return False
+
+    for node in document.nodes:
+        if _is_citation_heading(node.title):
+            existing = {_normalize_whitespace(item["raw_text"]) for item in _split_citation_lines(node.body)}
+            if normalized_citation in existing:
+                return False
+            body = node.body.rstrip()
+            node.body = f"{body}\n- {citation_text}".strip() if body else f"- {citation_text}"
+            return True
+
+    document.nodes.append(
+        DocumentNode(
+            node_type="section",
+            title=heading_title,
+            body=f"- {citation_text}",
+            depth=2,
+        )
+    )
+    return True
--- a/apps/api/src/ecospecies_api/document_repository.py
+++ b/apps/api/src/ecospecies_api/document_repository.py
@ -0,0 +1,267 @@
+from __future__ import annotations
+
+from sqlalchemy import select
+
+from ecospecies_api.citegeist_bridge import extract_draft_citation
+from ecospecies_api.document_format import (
+    build_document_from_species_payload,
+    document_to_json,
+    extract_citation_entries,
+    extract_species_projection,
+    export_markdown_document,
+    flatten_document_nodes,
+    parse_markdown_document,
+    validate_markdown_document,
+)
+from ecospecies_api.models import (
+    DocumentSection,
+    Species,
+    SpeciesCitation,
+    SpeciesDocument,
+    SpeciesDocumentNode,
+    SpeciesTaxonIdentifier,
+)
+
+
+def _persist_taxon_identifiers(session, species: Species, taxon_identifiers: list[dict[str, object]]) -> None:
+    for identifier in list(species.taxon_identifiers):
+        session.delete(identifier)
+    session.flush()
+
+    for position, item in enumerate(taxon_identifiers, start=1):
+        authority = str(item.get("authority", "")).strip()
+        identifier = str(item.get("identifier", "")).strip()
+        if not authority or not identifier:
+            continue
+        session.add(
+            SpeciesTaxonIdentifier(
+                species_id=species.id,
+                position=position,
+                authority=authority,
+                identifier=identifier,
+                label=str(item.get("label", "")).strip(),
+                is_primary=bool(item.get("primary") or item.get("is_primary")),
+                source_url=str(item.get("source_url", "")).strip(),
+            )
+        )
+
+
+def _existing_taxon_identifier_payload(species: Species) -> list[dict[str, object]]:
+    return [
+        {
+            "authority": item.authority,
+            "identifier": item.identifier,
+            "label": item.label,
+            "primary": item.is_primary,
+            "source_url": item.source_url,
+        }
+        for item in species.taxon_identifiers
+    ]
+
+
+def _citation_match_key(item: dict[str, object]) -> tuple[str, str, str]:
+    return (
+        str(item.get("section_heading", "")).strip(),
+        str(item.get("legacy_reference_number", "")).strip(),
+        str(item.get("raw_text", "")).strip(),
+    )
+
+
+def _persist_citations(session, species: Species, citations: list[dict[str, object]]) -> None:
+    existing_by_key = {
+        _citation_match_key(
+            {
+                "section_heading": citation.section_heading,
+                "legacy_reference_number": citation.legacy_reference_number,
+                "raw_text": citation.raw_text,
+            }
+        ): citation
+        for citation in species.citations
+    }
+    retained_ids: set[int] = set()
+
+    for position, item in enumerate(citations, start=1):
+        raw_text = str(item.get("raw_text", "")).strip()
+        if not raw_text:
+            continue
+        key = _citation_match_key(item)
+        legacy_reference_number = str(item.get("legacy_reference_number", "")).strip()
+        existing = existing_by_key.get(key)
+        extracted_normalized = str(item.get("normalized_text", "")).strip()
+        extracted_doi = str(item.get("doi", "")).strip()
+        draft = extract_draft_citation(raw_text, legacy_reference_number)
+
+        if existing is None:
+            session.add(
+                SpeciesCitation(
+                    species_id=species.id,
+                    position=position,
+                    section_heading=str(item.get("section_heading", "")).strip(),
+                    legacy_reference_number=legacy_reference_number,
+                    citation_key=draft.citation_key if draft is not None else "",
+                    entry_type=draft.entry_type if draft is not None else "misc",
+                    raw_text=raw_text,
+                    normalized_text=extracted_normalized,
+                    abstract_text="",
+                    draft_bibtex=draft.draft_bibtex if draft is not None else "",
+                    doi=extracted_doi,
+                    source_url="",
+                    openalex_id="",
+                    resolver_source_label="",
+                    enrichment_status="pending",
+                    enrichment_error="",
+                    source_type="document_extract",
+                    review_status="draft",
+                )
+            )
+            continue
+
+        existing.position = position
+        existing.section_heading = str(item.get("section_heading", "")).strip()
+        existing.legacy_reference_number = legacy_reference_number
+        existing.raw_text = raw_text
+        if existing.review_status == "draft":
+            existing.normalized_text = extracted_normalized
+            existing.abstract_text = ""
+            existing.doi = extracted_doi
+            existing.citation_key = draft.citation_key if draft is not None else ""
+            existing.entry_type = draft.entry_type if draft is not None else "misc"
+            existing.draft_bibtex = draft.draft_bibtex if draft is not None else ""
+            existing.source_type = "document_extract"
+            existing.enrichment_status = "pending"
+            existing.enrichment_error = ""
+            existing.resolver_source_label = ""
+            existing.source_url = ""
+            existing.openalex_id = ""
+        retained_ids.add(existing.id)
+        session.add(existing)
+
+    for citation in list(species.citations):
+        if citation.id not in retained_ids and citation.source_type in {"document_extract", "editor_review"}:
+            session.delete(citation)
+
+
+def _persist_document_model(session, species: Species, document_model, markdown_content: str, updated_by: str) -> None:
+    ast_json = document_to_json(document_model)
+    document = session.scalar(
+        select(SpeciesDocument).where(SpeciesDocument.species_id == species.id)
+    )
+    if document is None:
+        document = SpeciesDocument(
+            species_id=species.id,
+            source_format="ecospecies-markdown-v1",
+            markdown_content=markdown_content,
+            ast_json=ast_json,
+            updated_by=updated_by,
+        )
+        session.add(document)
+        session.flush()
+    else:
+        document.source_format = "ecospecies-markdown-v1"
+        document.markdown_content = markdown_content
+        document.ast_json = ast_json
+        document.updated_by = updated_by
+        session.add(document)
+
+    for node in list(document.nodes):
+        session.delete(node)
+    session.flush()
+
+    for node in flatten_document_nodes(document_model):
+        session.add(
+            SpeciesDocumentNode(
+                document_id=document.id,
+                parent_node_ref=node["parent_id"],
+                node_ref=node["node_id"],
+                position=node["position"],
+                depth=node["depth"],
+                node_type=node["node_type"],
+                title=node["title"],
+                body_markdown=node["body_markdown"],
+                body_plaintext=node["body_plaintext"],
+                )
+            )
+
+
+def sync_species_document(session, species: Species, item: dict[str, object]) -> None:
+    payload = dict(item)
+    if "taxon_identifiers" not in payload or not payload.get("taxon_identifiers"):
+        payload["taxon_identifiers"] = _existing_taxon_identifier_payload(species)
+    if "primary_taxon_authority" not in payload or not payload.get("primary_taxon_authority"):
+        for identifier in payload["taxon_identifiers"]:
+            if bool(identifier.get("primary")):
+                payload["primary_taxon_authority"] = str(identifier.get("authority", "")).strip()
+                break
+
+    document_model = build_document_from_species_payload(payload)
+    markdown_content = export_markdown_document(document_model)
+    _persist_document_model(
+        session,
+        species,
+        document_model,
+        markdown_content,
+        str(item.get("last_modified_by", "system-import")),
+    )
+    _persist_citations(session, species, extract_citation_entries(document_model))
+
+
+def get_species_document_payload(session, slug: str) -> dict[str, object] | None:
+    species = session.scalar(select(Species).where(Species.slug == slug))
+    if species is None or species.document is None:
+        return None
+
+    document = species.document
+    return {
+        "slug": species.slug,
+        "source_format": document.source_format,
+        "markdown": document.markdown_content,
+        "ast_json": document.ast_json,
+        "node_count": len(document.nodes),
+        "updated_by": document.updated_by,
+    }
+
+
+def save_species_document(session, species: Species, markdown: str, username: str) -> dict[str, object]:
+    errors = validate_markdown_document(markdown)
+    if errors:
+        raise ValueError("; ".join(errors))
+
+    document_model = parse_markdown_document(markdown)
+    projection = extract_species_projection(document_model)
+    _persist_document_model(session, species, document_model, markdown, username)
+    _persist_citations(session, species, extract_citation_entries(document_model))
+
+    if projection["title"]:
+        species.title = str(projection["title"])
+    if projection["common_name"]:
+        species.common_name = str(projection["common_name"])
+    if projection["scientific_name"]:
+        species.scientific_name = str(projection["scientific_name"])
+    if projection["flelmr_code"]:
+        species.flelmr_code = str(projection["flelmr_code"])
+    _persist_taxon_identifiers(session, species, list(projection["taxon_identifiers"]))
+    species.summary = str(projection["summary"])
+    species.section_count = len(projection["sections"])
+    species.last_modified_by = username
+
+    for section in list(species.sections):
+        session.delete(section)
+    session.flush()
+
+    for position, section in enumerate(projection["sections"], start=1):
+        session.add(
+            DocumentSection(
+                species_id=species.id,
+                position=position,
+                heading=str(section["heading"]),
+                content=str(section["content"]),
+            )
+        )
+
+    return {
+        "slug": species.slug,
+        "summary": species.summary,
+        "section_count": species.section_count,
+        "markdown": markdown,
+        "updated_by": username,
+    }
--- a/apps/api/src/ecospecies_api/models.py
+++ b/apps/api/src/ecospecies_api/models.py
@ -23,6 +23,9 @@ class Species(Base):
    publication_status: Mapped[str] = mapped_column(String(32), default="published", index=True)
    is_archived: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
    editor_notes: Mapped[str] = mapped_column(Text, default="")
+    created_by: Mapped[str] = mapped_column(String(255), default="system-import")
+    owner_username: Mapped[str] = mapped_column(String(255), default="")
+    owner_role: Mapped[str] = mapped_column(String(32), default="")
    last_modified_by: Mapped[str] = mapped_column(String(255), default="system-import")

    sections: Mapped[list["DocumentSection"]] = relationship(
@ -40,6 +43,21 @@ class Species(Base):
        cascade="all, delete-orphan",
        order_by="SpeciesAuditLog.id.desc()",
    )
+    document: Mapped["SpeciesDocument | None"] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        uselist=False,
+    )
+    taxon_identifiers: Mapped[list["SpeciesTaxonIdentifier"]] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        order_by="SpeciesTaxonIdentifier.position",
+    )
+    citations: Mapped[list["SpeciesCitation"]] = relationship(
+        back_populates="species",
+        cascade="all, delete-orphan",
+        order_by="SpeciesCitation.position",
+    )


 class DocumentSection(Base):
@ -77,3 +95,93 @@ class SpeciesAuditLog(Base):
    details_json: Mapped[str] = mapped_column(Text)

    species: Mapped[Species] = relationship(back_populates="audit_entries")
+
+
+class SpeciesDocument(Base):
+    __tablename__ = "species_document"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), unique=True, index=True)
+    source_format: Mapped[str] = mapped_column(String(64), default="ecospecies-markdown-v1")
+    markdown_content: Mapped[str] = mapped_column(Text, default="")
+    ast_json: Mapped[str] = mapped_column(Text, default="")
+    updated_by: Mapped[str] = mapped_column(String(255), default="system-import")
+
+    species: Mapped[Species] = relationship(back_populates="document")
+    nodes: Mapped[list["SpeciesDocumentNode"]] = relationship(
+        back_populates="document",
+        cascade="all, delete-orphan",
+        order_by="SpeciesDocumentNode.position",
+    )
+
+
+class SpeciesDocumentNode(Base):
+    __tablename__ = "species_document_node"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    document_id: Mapped[int] = mapped_column(ForeignKey("species_document.id", ondelete="CASCADE"), index=True)
+    parent_node_ref: Mapped[str | None] = mapped_column(String(64), nullable=True, default=None)
+    node_ref: Mapped[str] = mapped_column(String(64), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    depth: Mapped[int] = mapped_column(Integer, default=2)
+    node_type: Mapped[str] = mapped_column(String(32), default="section")
+    title: Mapped[str] = mapped_column(String(255), default="")
+    body_markdown: Mapped[str] = mapped_column(Text, default="")
+    body_plaintext: Mapped[str] = mapped_column(Text, default="")
+    source_heading: Mapped[str] = mapped_column(String(255), default="")
+    source_span_start: Mapped[int | None] = mapped_column(Integer, nullable=True, default=None)
+    source_span_end: Mapped[int | None] = mapped_column(Integer, nullable=True, default=None)
+
+    document: Mapped[SpeciesDocument] = relationship(back_populates="nodes")
+
+
+class ContributorAccount(Base):
+    __tablename__ = "contributor_account"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    email: Mapped[str] = mapped_column(String(255), unique=True, index=True)
+    token_hash: Mapped[str] = mapped_column(String(128), unique=True, index=True)
+    age_gate_confirmed: Mapped[bool] = mapped_column(Boolean, default=False)
+    created_at: Mapped[str] = mapped_column(String(64), index=True)
+    is_active: Mapped[bool] = mapped_column(Boolean, default=True, index=True)
+
+
+class SpeciesTaxonIdentifier(Base):
+    __tablename__ = "species_taxon_identifier"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    authority: Mapped[str] = mapped_column(String(64), default="")
+    identifier: Mapped[str] = mapped_column(String(255), default="")
+    label: Mapped[str] = mapped_column(String(128), default="")
+    is_primary: Mapped[bool] = mapped_column(Boolean, default=False, index=True)
+    source_url: Mapped[str] = mapped_column(String(500), default="")
+
+    species: Mapped[Species] = relationship(back_populates="taxon_identifiers")
+
+
+class SpeciesCitation(Base):
+    __tablename__ = "species_citation"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    species_id: Mapped[int] = mapped_column(ForeignKey("species.id", ondelete="CASCADE"), index=True)
+    position: Mapped[int] = mapped_column(Integer, default=1)
+    section_heading: Mapped[str] = mapped_column(String(255), default="")
+    legacy_reference_number: Mapped[str] = mapped_column(String(64), default="", index=True)
+    citation_key: Mapped[str] = mapped_column(String(255), default="", index=True)
+    entry_type: Mapped[str] = mapped_column(String(64), default="misc")
+    raw_text: Mapped[str] = mapped_column(Text, default="")
+    normalized_text: Mapped[str] = mapped_column(Text, default="")
+    abstract_text: Mapped[str] = mapped_column(Text, default="")
+    draft_bibtex: Mapped[str] = mapped_column(Text, default="")
+    doi: Mapped[str] = mapped_column(String(255), default="", index=True)
+    source_url: Mapped[str] = mapped_column(String(500), default="")
+    openalex_id: Mapped[str] = mapped_column(String(64), default="", index=True)
+    resolver_source_label: Mapped[str] = mapped_column(String(255), default="")
+    enrichment_status: Mapped[str] = mapped_column(String(32), default="pending", index=True)
+    enrichment_error: Mapped[str] = mapped_column(Text, default="")
+    source_type: Mapped[str] = mapped_column(String(64), default="document_extract")
+    review_status: Mapped[str] = mapped_column(String(32), default="draft", index=True)
+
+    species: Mapped[Species] = relationship(back_populates="citations")
--- a/apps/api/src/ecospecies_api/parser.py
+++ b/apps/api/src/ecospecies_api/parser.py
@ -1,14 +1,18 @@
 from __future__ import annotations

+import hashlib
 import os
 import re
+from collections import Counter
 from dataclasses import dataclass
 from pathlib import Path


 SECTION_PATTERN = re.compile(r"^[A-Z][A-Z\s/&()-]{2,}$")
+TITLE_SECTION_PATTERN = re.compile(r"^[A-Z][A-Za-z\s/&()-]{2,}$")
 FIELD_PATTERN = re.compile(r"^(?P<key>[A-Za-z/ _-]+):\s*(?P<value>.*)$")
 SUMMARY_MARKER_PATTERN = re.compile(r"^(summary(?:/abstract)?|abstract|executive summary):?\s*$", re.IGNORECASE)
+SAFE_DIRECTORY_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_-]+$")


@dataclass
@ -38,6 +42,10 @@ class SpeciesRecord:
    diagnostics: list[IngestDiagnostic]


+def get_repo_root() -> Path:
+    return Path(__file__).resolve().parents[4]
+
+
 def slugify(value: str) -> str:
    cleaned = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
    return cleaned or "unknown-species"
@ -53,6 +61,33 @@ def normalize_whitespace(value: str) -> str:
    return re.sub(r"\s+", " ", value).strip()


+def is_section_heading(line: str) -> bool:
+    stripped = line.strip()
+    if not stripped:
+        return False
+    normalized = stripped[:-1].strip() if stripped.endswith(":") else stripped
+    if not normalized:
+        return False
+    if ":" in normalized:
+        return False
+    if SECTION_PATTERN.fullmatch(normalized):
+        return True
+    if not TITLE_SECTION_PATTERN.fullmatch(normalized):
+        return False
+
+    words = normalized.split()
+    if len(words) > 4:
+        return False
+    return all(word[0].isupper() for word in words if word and word[0].isalpha())
+
+
+def normalize_heading(line: str) -> str:
+    stripped = line.strip()
+    if stripped.endswith(":"):
+        return stripped[:-1].strip()
+    return stripped
+
+
 def split_sections(lines: list[str]) -> list[Section]:
    sections: list[Section] = []
    current_heading = "HEADER"
@ -61,7 +96,7 @@ def split_sections(lines: list[str]) -> list[Section]:
    for raw_line in lines:
        line = raw_line.rstrip()
        stripped = line.strip()
-        if SECTION_PATTERN.fullmatch(stripped):
+        if is_section_heading(stripped):
            if current_lines:
                sections.append(
                    Section(
@ -69,7 +104,7 @@ def split_sections(lines: list[str]) -> list[Section]:
                        content="\n".join(current_lines).strip(),
                    )
                )
-            current_heading = stripped
+            current_heading = normalize_heading(stripped)
            current_lines = []
            continue
        current_lines.append(line)
@ -96,8 +131,9 @@ def extract_metadata(lines: list[str]) -> dict[str, str]:
        value = match.group("value").strip()
        metadata[key] = value

-        # Legacy files vary between "FLELMR", "FLELMR Code", and similar labels.
-        if key.startswith("flelmr"):
+        # Legacy files vary between "FLELMR", "FLELMR Code", "EcoSpecies Code",
+        # and similar labels.
+        if key.startswith("flelmr") or key == "ecospecies code":
            metadata["flelmr"] = value

    return metadata
@ -127,7 +163,7 @@ def extract_summary(lines: list[str], sections: list[Section]) -> str:
                if summary_lines:
                    summary_lines.append("")
                continue
-            if SECTION_PATTERN.fullmatch(stripped):
+            if is_section_heading(stripped):
                break
            if stripped.startswith("[") and not summary_lines:
                break
@ -202,23 +238,76 @@ def parse_species_file(path: Path) -> SpeciesRecord:
    )


+def ensure_unique_record_slugs(records: list[SpeciesRecord]) -> list[SpeciesRecord]:
+    slug_counts = Counter(record.slug for record in records)
+    used_slugs: set[str] = set()
+
+    for record in records:
+        base_slug = record.slug
+        if slug_counts[base_slug] == 1 and base_slug not in used_slugs:
+            used_slugs.add(base_slug)
+            continue
+
+        disambiguator = slugify(Path(record.source_file).stem)
+        if disambiguator == base_slug:
+            disambiguator = hashlib.sha1(record.source_file.encode("utf-8")).hexdigest()[:8]
+
+        candidate = f"{base_slug}-{disambiguator}"
+        if candidate in used_slugs:
+            source_hash = hashlib.sha1(record.source_file.encode("utf-8")).hexdigest()[:8]
+            candidate = f"{candidate}-{source_hash}"
+
+        suffix = 2
+        while candidate in used_slugs:
+            candidate = f"{base_slug}-{disambiguator}-{suffix}"
+            suffix += 1
+
+        record.slug = candidate
+        used_slugs.add(candidate)
+
+    return records
+
+
 def load_species_records(data_dir: str) -> list[SpeciesRecord]:
-    base = Path(data_dir)
+    base = resolve_data_dir(data_dir)
    if not base.exists():
        return []

    records: list[SpeciesRecord] = []
    for path in sorted(base.glob("*.txt")):
        records.append(parse_species_file(path))
-    return records
+    return ensure_unique_record_slugs(records)
+
+
+def resolve_data_dir(data_dir: str) -> Path:
+    repo_root = get_repo_root().resolve()
+    raw_value = data_dir.strip()
+    if not raw_value:
+        raise ValueError("Species data directory cannot be empty.")
+
+    candidate = Path(raw_value)
+    if candidate.is_absolute():
+        resolved = candidate.resolve()
+    else:
+        resolved = (repo_root / candidate).resolve()
+
+    try:
+        relative = resolved.relative_to(repo_root)
+    except ValueError as exc:
+        raise ValueError("Species data directory must stay within the codebase directory.") from exc
+
+    if not relative.parts:
+        raise ValueError("Species data directory must be a subdirectory of the codebase.")
+
+    for part in relative.parts:
+        if not SAFE_DIRECTORY_NAME_PATTERN.fullmatch(part):
+            raise ValueError(
+                f"Species data directory contains an unsafe directory name: {part!r}."
+            )
+
+    return resolved


 def get_default_data_dir() -> str:
-    return os.environ.get(
-        "ECOSPECIES_DATA_DIR",
-        str(
-            Path(__file__).resolve().parents[4].parent
-            / "01-legacy-code-and-data"
-            / "InputFiles - TXT"
-        ),
-    )
+    configured = os.environ.get("ECOSPECIES_DATA_DIR", "input-data/InputFiles")
+    return str(resolve_data_dir(configured))
--- a/apps/api/src/ecospecies_api/repository.py
+++ b/apps/api/src/ecospecies_api/repository.py
--- a/apps/api/test_auth.py
+++ b/apps/api/test_auth.py
@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_auth.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_auth", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
--- a/apps/api/test_citation_enrichment.py
+++ b/apps/api/test_citation_enrichment.py
@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_citation_enrichment.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_citation_enrichment", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
--- a/apps/api/test_document_format.py
+++ b/apps/api/test_document_format.py
@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_document_format.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_document_format", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
--- a/apps/api/test_parser.py
+++ b/apps/api/test_parser.py
@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+    sys.path.insert(0, str(SRC))
+
+TEST_PATH = ROOT / "tests" / "test_parser.py"
+SPEC = importlib.util.spec_from_file_location("ecospecies_api_test_parser", TEST_PATH)
+MODULE = importlib.util.module_from_spec(SPEC)
+assert SPEC is not None and SPEC.loader is not None
+SPEC.loader.exec_module(MODULE)
+
+for name in dir(MODULE):
+    if name.startswith("Test") or name.endswith("Tests"):
+        globals()[name] = getattr(MODULE, name)
--- a/apps/api/tests/test_auth.py
+++ b/apps/api/tests/test_auth.py
@ -0,0 +1,58 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from ecospecies_api import auth, repository
+
+
+class ContributorAuthTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tempdir = tempfile.TemporaryDirectory()
+        db_path = Path(self.tempdir.name) / "test.db"
+        self.engine = create_engine(f"sqlite:///{db_path}", future=True)
+        self.session_local = sessionmaker(
+            bind=self.engine,
+            autoflush=False,
+            autocommit=False,
+            future=True,
+        )
+        self.repository_engine_patch = patch.object(repository, "create_db_engine", return_value=self.engine)
+        self.repository_session_patch = patch.object(repository, "SessionLocal", self.session_local)
+        self.auth_engine_patch = patch.object(auth, "create_db_engine", return_value=self.engine)
+        self.auth_session_patch = patch.object(auth, "SessionLocal", self.session_local)
+        self.repository_engine_patch.start()
+        self.repository_session_patch.start()
+        self.auth_engine_patch.start()
+        self.auth_session_patch.start()
+
+    def tearDown(self) -> None:
+        self.auth_session_patch.stop()
+        self.auth_engine_patch.stop()
+        self.repository_session_patch.stop()
+        self.repository_engine_patch.stop()
+        self.engine.dispose()
+        self.tempdir.cleanup()
+
+    def test_contributor_token_resolves_to_contributor_session(self) -> None:
+        registration = repository.register_contributor("author@example.org", True)
+
+        session = auth.resolve_auth_session({"Authorization": f"Bearer {registration['token']}"})
+
+        self.assertIsNotNone(session)
+        assert session is not None
+        self.assertEqual(session.username, "author@example.org")
+        self.assertEqual(session.role, "contributor")
+
+    def test_contributor_role_does_not_satisfy_editor(self) -> None:
+        self.assertTrue(auth.role_satisfies("editor", "contributor"))
+        self.assertFalse(auth.role_satisfies("contributor", "editor"))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/apps/api/tests/test_citation_enrichment.py
+++ b/apps/api/tests/test_citation_enrichment.py
@ -0,0 +1,527 @@
+from __future__ import annotations
+
+import unittest
+from unittest.mock import patch
+
+from ecospecies_api.citation_enrichment import (
+    _crossref_message_to_entry,
+    _datacite_item_to_entry,
+    _openalex_work_to_entry,
+    _render_normalized_text,
+    apply_citation_candidate_selection,
+    discover_citation_candidates,
+    enrich_citation_payload,
+    LocalBibEntry,
+    LocalMetadataResolver,
+    LocalResolution,
+)
+from ecospecies_api.citegeist_bridge import extract_draft_citation, render_single_bibtex
+
+
+class CitationEnrichmentTests(unittest.TestCase):
+    def test_render_normalized_text_includes_volume_number_and_pages(self) -> None:
+        rendered = _render_normalized_text(
+            "article",
+            {
+                "author": "Daniell, W.C.",
+                "year": "1872",
+                "title": "Letters referring to experiments of W.C. Daniell",
+                "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                "volume": "2",
+                "number": "4",
+                "pages": "387-390",
+                "doi": "10.1000/example",
+            },
+        )
+
+        self.assertEqual(
+            rendered,
+            "Daniell, W.C. (1872). Letters referring to experiments of W.C. Daniell. Comm. Rept. U.S. Comm. Fish & Fish., 2(4): 387-390. DOI:10.1000/example",
+        )
+
+    def test_crossref_mapping_keeps_volume_issue_and_pages(self) -> None:
+        entry = _crossref_message_to_entry(
+            {
+                "type": "journal-article",
+                "title": ["Example Work"],
+                "issued": {"date-parts": [[1872]]},
+                "author": [{"family": "Daniell", "given": "W.C."}],
+                "container-title": ["Comm. Rept. U.S. Comm. Fish & Fish."],
+                "DOI": "10.1000/example",
+                "URL": "https://doi.org/10.1000/example",
+                "volume": "2",
+                "issue": "4",
+                "page": "387-390",
+            }
+        )
+
+        self.assertEqual(entry.fields["volume"], "2")
+        self.assertEqual(entry.fields["number"], "4")
+        self.assertEqual(entry.fields["pages"], "387-390")
+
+    def test_openalex_mapping_keeps_biblio_fields(self) -> None:
+        entry = _openalex_work_to_entry(
+            {
+                "id": "https://openalex.org/W12345",
+                "display_name": "OpenAlex Discovered Work",
+                "publication_year": 2022,
+                "type": "article",
+                "doi": "https://doi.org/10.1000/example-openalex",
+                "authorships": [{"author": {"display_name": "J S, Smith"}}],
+                "primary_location": {"source": {"display_name": "Journal of Graph Discovery"}},
+                "biblio": {"volume": "12", "issue": "3", "first_page": "101", "last_page": "118"},
+                "abstract_inverted_index": {"Graphs": [0], "support": [1], "learning": [2]},
+            }
+        )
+
+        self.assertEqual(entry.fields["author"], "Smith, J. S.")
+        self.assertEqual(entry.fields["volume"], "12")
+        self.assertEqual(entry.fields["number"], "3")
+        self.assertEqual(entry.fields["pages"], "101-118")
+        self.assertEqual(entry.fields["abstract"], "Graphs support learning")
+
+    def test_openalex_mapping_handles_null_source(self) -> None:
+        entry = _openalex_work_to_entry(
+            {
+                "id": "https://openalex.org/W54321",
+                "display_name": "OpenAlex Work Without Source",
+                "publication_year": 2021,
+                "type": "article",
+                "doi": "https://doi.org/10.1000/example-null-source",
+                "authorships": [{"author": {"display_name": "Jane Smith"}}],
+                "primary_location": {"source": None},
+                "biblio": {"volume": "5", "issue": "1", "first_page": "10", "last_page": "20"},
+            }
+        )
+
+        self.assertEqual(entry.fields["title"], "OpenAlex Work Without Source")
+        self.assertNotIn("journal", entry.fields)
+        self.assertEqual(entry.fields["volume"], "5")
+        self.assertEqual(entry.fields["number"], "1")
+        self.assertEqual(entry.fields["pages"], "10-20")
+
+    def test_datacite_mapping_keeps_container_and_pages(self) -> None:
+        entry = _datacite_item_to_entry(
+            {
+                "attributes": {
+                    "titles": [{"title": "DataCite Work"}],
+                    "creators": [{"name": "J R, Rivera"}],
+                    "publicationYear": "2021",
+                    "doi": "10.1000/datacite-work",
+                    "url": "https://doi.org/10.1000/datacite-work",
+                    "container": "Journal of Metadata",
+                    "volume": "7",
+                    "issue": "2",
+                    "firstPage": "44",
+                    "lastPage": "59",
+                    "descriptions": [
+                        {"descriptionType": "Abstract", "description": "Abstract: Metadata makes reuse easier."}
+                    ],
+                }
+            }
+        )
+
+        self.assertEqual(entry.fields["author"], "Rivera, J. R.")
+        self.assertEqual(entry.fields["journal"], "Journal of Metadata")
+        self.assertEqual(entry.fields["volume"], "7")
+        self.assertEqual(entry.fields["number"], "2")
+        self.assertEqual(entry.fields["pages"], "44-59")
+        self.assertEqual(entry.fields["abstract"], "Metadata makes reuse easier.")
+
+    def test_render_single_bibtex_preserves_balanced_braces_and_repairs_unmatched_ones(self) -> None:
+        rendered = render_single_bibtex(
+            "misc",
+            "example",
+            {
+                "title": "Alpha_beta {Gamma}",
+                "note": "raw_reference = {Alpha } beta}",
+            },
+        )
+
+        self.assertIn("title = {Alpha_beta {Gamma}}", rendered)
+        self.assertIn("note = {raw_reference = {Alpha } beta)}", rendered)
+
+    def test_extract_draft_citation_repairs_report_style_reference_fields(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "year": "1872",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                legacy_reference_number="160",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["author"], "Daniell, W.C")
+        self.assertEqual(
+            draft.fields["title"],
+            "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+        )
+        self.assertEqual(draft.fields["journal"], "Comm. Rept. U.S. Comm. Fish & Fish")
+        self.assertEqual(draft.fields["volume"], "2")
+        self.assertEqual(draft.fields["pages"], "387-390")
+        self.assertEqual(draft.citation_key, "daniell1872lettersreferringexperiments")
+
+    def test_extract_draft_citation_does_not_split_title_on_report_word(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Smith, J. 1999. Habitat report synthesis for Alabama shad. NOAA Tech. Memo. NMFS-SEFSC-200.",
+                "year": "1999",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Smith, J. 1999. Habitat report synthesis for Alabama shad. NOAA Tech. Memo. NMFS-SEFSC-200.",
+                legacy_reference_number="42",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["author"], "Smith, J")
+        self.assertEqual(draft.fields["title"], "Habitat report synthesis for Alabama shad")
+        self.assertEqual(draft.fields["howpublished"], "NOAA Tech. Memo. NMFS-SEFSC-200")
+        self.assertNotIn("journal", draft.fields)
+
+    def test_extract_draft_citation_repairs_proc_abbreviation_venue(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Bailey, R.M., H.E. Winn and C.L. Smith. 1954. Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes. Proc. Acad. Sci. Philad. 106: 109-134.",
+                "year": "1954",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Bailey, R.M., H.E. Winn and C.L. Smith. 1954. Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes. Proc. Acad. Sci. Philad. 106: 109-134.",
+                legacy_reference_number="26",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(
+            draft.fields["title"],
+            "Fishes from the Escambia River, Alabama and Florida, with ecologic and taxonomic notes",
+        )
+        self.assertEqual(draft.fields["journal"], "Proc. Acad. Sci. Philad")
+        self.assertEqual(draft.fields["volume"], "106")
+        self.assertEqual(draft.fields["pages"], "109-134")
+
+    def test_extract_draft_citation_repairs_occasional_paper_venue(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                "year": "1950",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                legacy_reference_number="41",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(
+            draft.fields["title"],
+            "Annotated list of the fauna of the Grand Isle region, 1928-1946",
+        )
+        self.assertEqual(draft.fields["journal"], "Occas. Pap. Mar. Lab., LSU")
+        self.assertEqual(draft.fields["volume"], "6")
+        self.assertEqual(draft.fields["number"], "6")
+        self.assertEqual(draft.fields["pages"], "1-66")
+
+    def test_extract_draft_citation_repairs_partial_existing_venue_stub(self) -> None:
+        class MockEntry:
+            entry_type = "misc"
+            citation_key = "badkey"
+            fields = {
+                "title": "Annotated list of the fauna of the Grand Isle region, 1928-1946",
+                "year": "1950",
+                "howpublished": "Occas",
+                "note": "extracted_reference = {true}",
+            }
+
+        with patch(
+            "ecospecies_api.citegeist_bridge._load_citegeist_extract",
+            return_value=lambda text: [MockEntry()],
+        ):
+            draft = extract_draft_citation(
+                "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                legacy_reference_number="41",
+            )
+
+        self.assertIsNotNone(draft)
+        assert draft is not None
+        self.assertEqual(draft.fields["journal"], "Occas. Pap. Mar. Lab., LSU")
+        self.assertEqual(draft.fields["volume"], "6")
+        self.assertEqual(draft.fields["number"], "6")
+        self.assertEqual(draft.fields["pages"], "1-66")
+
+    def test_falls_back_to_internal_resolver_when_citegeist_repo_is_unavailable(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:doi:10.1000/example"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "doi101000example"
+                        fields = {
+                            "author": "Smith, Jane",
+                            "year": "2024",
+                            "title": "Example Work",
+                            "journal": "Journal of Examples",
+                            "doi": "10.1000/example",
+                            "url": "https://doi.org/10.1000/example",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        with patch(
+            "ecospecies_api.citation_enrichment._load_citegeist_resolution_components",
+            return_value=(None, None, None, None),
+        ):
+            result = enrich_citation_payload(
+                {
+                    "raw_text": "Smith, Jane. 2024. Example Work.",
+                    "legacy_reference_number": "7",
+                },
+                resolver=MockResolver(),
+            )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["doi"], "10.1000/example")
+        self.assertEqual(result["source_url"], "https://doi.org/10.1000/example")
+        self.assertEqual(result["resolver_source_label"], "crossref:doi:10.1000/example")
+        self.assertIn("ecospecies_reference_number = {7}", result["draft_bibtex"])
+
+    def test_enrichment_replaces_raw_reference_title_and_dedupes_legacy_note(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:search:Letters referring to experiments"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "daniell1872lettersshadalabama"
+                        fields = {
+                            "author": "Daniell, W.C.",
+                            "year": "1872",
+                            "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                            "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                            "url": "",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+                "citation_key": "daniell1948daniellwc",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["citation_key"], "daniell1872lettersreferringexperiments")
+        self.assertIn(
+            "title = {Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River}",
+            result["draft_bibtex"],
+        )
+        self.assertIn("year = {1872}", result["draft_bibtex"])
+        self.assertEqual(result["draft_bibtex"].count("ecospecies_reference_number = {160}"), 1)
+
+    def test_enrichment_rejects_conflicting_resolved_metadata(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                class Resolution:
+                    source_label = "crossref:search:alabama-shad-false-positive"
+
+                    class Entry:
+                        entry_type = "article"
+                        citation_key = "daniell2009habitatuseage"
+                        fields = {
+                            "author": "Daniell, W.C.",
+                            "year": "2009",
+                            "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                            "journal": "Transactions of the American Fisheries Society",
+                            "doi": "10.1111/j.1600-0633.2009.00395.x",
+                            "url": "https://doi.org/10.1111/j.1600-0633.2009.00395.x",
+                            "volume": "19",
+                            "number": "1",
+                            "pages": "107-115",
+                        }
+
+                    entry = Entry()
+
+                return Resolution()
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "unresolved")
+        self.assertIn("conflicts with citation seed fields", result["enrichment_error"])
+
+    def test_unresolved_enrichment_still_returns_refreshed_seed_fields(self) -> None:
+        class MockResolver:
+            def resolve_entry(self, entry):
+                return None
+
+        result = enrich_citation_payload(
+            {
+                "raw_text": "Behre, E.H. 1950. Annotated list of the fauna of the Grand Isle region, 1928-1946. Occas. Pap. Mar. Lab., LSU 6(6): 1-66.",
+                "legacy_reference_number": "41",
+                "citation_key": "oldbadkey",
+                "entry_type": "misc",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["enrichment_status"], "unresolved")
+        self.assertEqual(result["citation_key"], "behre1950annotatedlistfauna")
+        self.assertIn("title = {Annotated list of the fauna of the Grand Isle region, 1928-1946}", result["draft_bibtex"])
+        self.assertIn("Occas. Pap. Mar. Lab., LSU", result["draft_bibtex"])
+
+    def test_discover_citation_candidates_returns_scored_candidates(self) -> None:
+        class MockResolver:
+            def search_crossref_candidates(self, title):
+                return [
+                    LocalResolution(
+                        LocalBibEntry(
+                            "article",
+                            "daniell1872lettersreferringexperiments",
+                            {
+                                "author": "Daniell, W.C.",
+                                "year": "1872",
+                                "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                                "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                                "volume": "2",
+                                "pages": "387-390",
+                            },
+                        ),
+                        "crossref:search:1:daniell-good",
+                    ),
+                    LocalResolution(
+                        LocalBibEntry(
+                            "article",
+                            "daniell2009habitatuseage",
+                            {
+                                "author": "Daniell, W.C.",
+                                "year": "2009",
+                                "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                                "journal": "Transactions of the American Fisheries Society",
+                                "volume": "19",
+                                "number": "1",
+                                "pages": "107-115",
+                            },
+                        ),
+                        "crossref:search:2:daniell-bad",
+                    ),
+                ]
+
+            def search_datacite_candidates(self, title):
+                return []
+
+            def search_openalex_candidates(self, title):
+                return []
+
+        result = discover_citation_candidates(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            resolver=MockResolver(),
+        )
+
+        self.assertEqual(result["candidate_count"], 2)
+        self.assertGreater(result["candidates"][0]["score"], result["candidates"][1]["score"])
+        self.assertEqual(result["candidates"][0]["field_matches"]["year"]["status"], "exact")
+        self.assertEqual(result["candidates"][1]["field_matches"]["year"]["status"], "conflict")
+
+    def test_local_crossref_candidate_search_filters_placeholder_stub_entries(self) -> None:
+        resolver = LocalMetadataResolver()
+        resolver._safe_get_json = lambda url: {
+            "message": {
+                "items": [
+                    {
+                        "type": "journal-article",
+                        "title": ["Referenced work 1"],
+                        "issued": {"date-parts": [[2020]]},
+                    },
+                    {
+                        "type": "journal-article",
+                        "title": ["Useful Paper"],
+                        "issued": {"date-parts": [[2020]]},
+                        "author": [{"family": "Smith", "given": "J S"}],
+                        "container-title": ["Journal of Examples"],
+                        "DOI": "10.1000/useful",
+                    },
+                ]
+            }
+        }
+
+        results = resolver.search_crossref_candidates("Useful Paper")
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0].entry.fields["title"], "Useful Paper")
+
+    def test_apply_citation_candidate_selection_uses_selected_candidate_fields(self) -> None:
+        result = apply_citation_candidate_selection(
+            {
+                "raw_text": "Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.",
+                "legacy_reference_number": "160",
+            },
+            {
+                "source_label": "crossref:search:1:daniell-good",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Daniell, W.C.",
+                    "year": "1872",
+                    "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                    "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                    "volume": "2",
+                    "pages": "387-390",
+                },
+            },
+        )
+
+        self.assertEqual(result["enrichment_status"], "resolved")
+        self.assertEqual(result["resolver_source_label"], "editor:selected:crossref:search:1:daniell-good")
+        self.assertIn("Comm. Rept. U.S. Comm. Fish & Fish., 2: 387-390", result["normalized_text"])
--- a/apps/api/tests/test_document_format.py
+++ b/apps/api/tests/test_document_format.py
@ -0,0 +1,195 @@
+from __future__ import annotations
+
+import json
+import unittest
+
+from ecospecies_api.document_format import (
+    DocumentNode,
+    StructuredDocument,
+    build_document_from_species_payload,
+    extract_citation_entries,
+    extract_species_projection,
+    export_markdown_document,
+    parse_markdown_document,
+    validate_markdown_document,
+)
+
+
+class StructuredMarkdownTests(unittest.TestCase):
+    def test_round_trip_markdown_preserves_metadata_and_hierarchy(self) -> None:
+        source = """---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+primary_taxon_authority: worms
+---
+
+## Summary
+Short abstract.
+
+## Habitat
+
+### Type
+Estuarine.
+"""
+
+        document = parse_markdown_document(source)
+
+        self.assertEqual(document.metadata["title"], "American Oyster")
+        self.assertEqual(document.metadata["primary_taxon_authority"], "worms")
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
+        self.assertEqual(document.metadata["taxon_identifiers"][0]["authority"], "worms")
+        self.assertEqual(document.nodes[0].title, "Summary")
+        self.assertEqual(document.nodes[1].children[0].title, "Type")
+        self.assertIn("## Habitat", export_markdown_document(document))
+
+    def test_build_document_from_species_payload_creates_markdown_sections(self) -> None:
+        document = build_document_from_species_payload(
+            {
+                "title": "American Oyster",
+                "common_name": "American Oyster",
+                "scientific_name": "Crassostrea virginica",
+                "flelmr_code": "5192",
+                "source_file": "American Oyster.txt",
+                "summary": "Short abstract.",
+                "sections": [
+                    {"heading": "HEADER", "content": "Ignored header"},
+                    {"heading": "Habitat", "content": "Estuarine."},
+                    {"heading": "Reproduction", "content": "Broadcast spawner."},
+                ],
+            }
+        )
+
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["identifier"], "5192")
+        self.assertEqual(document.metadata["legacy_identifiers"][0]["authority"], "legacy-ecospecies")
+        self.assertEqual([node.title for node in document.nodes], ["Summary", "Habitat", "Reproduction"])
+        self.assertEqual(document.nodes[1].body, "Estuarine.")
+
+    def test_extract_species_projection_flattens_nested_headings(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+---
+
+## Summary
+Short abstract.
+
+## Habitat
+General habitat.
+
+### Type
+Estuarine.
+"""
+        )
+
+        projection = extract_species_projection(document)
+
+        self.assertEqual(projection["summary"], "Short abstract.")
+        self.assertEqual(projection["flelmr_code"], "5192")
+        self.assertEqual(
+            [section["heading"] for section in projection["sections"]],
+            ["Habitat", "Habitat / Type"],
+        )
+
+    def test_extract_species_projection_accepts_legacy_species_code_front_matter(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Legacy Fish
+common_name: Legacy Fish
+scientific_name: Pisces historicus
+species_code: 4242
+---
+
+## Habitat
+Estuarine.
+"""
+        )
+
+        projection = extract_species_projection(document)
+
+        self.assertEqual(projection["flelmr_code"], "4242")
+
+    def test_validate_markdown_document_rejects_missing_front_matter_and_depth_jump(self) -> None:
+        errors = validate_markdown_document(
+            """## Habitat
+Text
+
+#### Type
+Nested too deeply.
+"""
+        )
+
+        self.assertTrue(any("front matter" in error for error in errors))
+        self.assertTrue(any("Heading depth jumps" in error for error in errors))
+
+    def test_extract_citation_entries_strips_legacy_comma_number_prefix(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Alabama Shad
+common_name: Alabama Shad
+scientific_name: Alosa alabamae
+---
+
+## References
+160, Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+"""
+        )
+
+        citations = extract_citation_entries(document)
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "160")
+        self.assertTrue(citations[0]["raw_text"].startswith("Daniell, W.C. 1872."))
+        self.assertFalse(citations[0]["raw_text"].startswith("160,"))
+
+    def test_extract_citation_entries_accepts_colon_terminated_citation_heading(self) -> None:
+        citations = extract_citation_entries(
+            StructuredDocument(
+                metadata={},
+                nodes=[
+                    DocumentNode(
+                        node_type="section",
+                        title="Citations:",
+                        body="7, Ahmed, M. 1975. Speciation in living oysters.",
+                        depth=2,
+                    )
+                ],
+            )
+        )
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "7")
+
+    def test_extract_citation_entries_accepts_bare_number_prefix(self) -> None:
+        document = parse_markdown_document(
+            """---
+title: Eastern Mosquitofish
+common_name: Eastern Mosquitofish
+scientific_name: Gambusia holbrooki
+---
+
+## Citations
+848 Gilmore, R.G. 1977. Fishes of the Indian River Lagoon and adjacent waters, Florida.
+"""
+        )
+
+        citations = extract_citation_entries(document)
+
+        self.assertEqual(len(citations), 1)
+        self.assertEqual(citations[0]["legacy_reference_number"], "848")
+        self.assertTrue(citations[0]["raw_text"].startswith("Gilmore, R.G. 1977."))
--- a/apps/api/tests/test_parser.py
+++ b/apps/api/tests/test_parser.py
@ -0,0 +1,109 @@
+from __future__ import annotations
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from ecospecies_api import parser
+
+
+class ParserPathResolutionTests(unittest.TestCase):
+    def test_ecospecies_code_is_treated_as_flelmr_code(self) -> None:
+        metadata = parser.extract_metadata(
+            [
+                "Title: Test Fish",
+                "EcoSpecies Code: 4242",
+            ]
+        )
+
+        self.assertEqual(metadata["ecospecies code"], "4242")
+        self.assertEqual(metadata["flelmr"], "4242")
+
+    def test_title_case_headings_are_split_into_sections(self) -> None:
+        sections = parser.split_sections(
+            [
+                "Species profile: American oyster (Crassostrea virginica)",
+                "",
+                "Classification",
+                "      Phylum: Mollusca",
+                "Value",
+                "Commercial: Important fishery.",
+                "Habitat",
+                "Type: Estuarine.",
+            ]
+        )
+
+        self.assertEqual(
+            [section.heading for section in sections],
+            ["HEADER", "Classification", "Value", "Habitat"],
+        )
+
+    def test_colon_terminated_title_case_headings_are_split_into_sections(self) -> None:
+        sections = parser.split_sections(
+            [
+                "Ecological Interactions and Notes",
+                "Predator text.",
+                "",
+                "Reference Numbers:",
+                "",
+                "Citations:",
+                "7, Ahmed, M. 1975. Speciation in living oysters.",
+            ]
+        )
+
+        self.assertEqual(
+            [section.heading for section in sections],
+            ["HEADER", "Citations"],
+        )
+
+    def test_default_data_dir_uses_in_repo_path_without_spaces(self) -> None:
+        with patch.dict("os.environ", {}, clear=True):
+            resolved = Path(parser.get_default_data_dir())
+
+        self.assertEqual(resolved, parser.get_repo_root() / "input-data" / "InputFiles")
+
+    def test_relative_override_must_stay_within_repo(self) -> None:
+        with self.assertRaisesRegex(ValueError, "within the codebase directory"):
+            parser.resolve_data_dir("../input-data/InputFiles")
+
+    def test_absolute_override_outside_repo_is_rejected(self) -> None:
+        with tempfile.TemporaryDirectory() as tempdir:
+            with self.assertRaisesRegex(ValueError, "within the codebase directory"):
+                parser.resolve_data_dir(tempdir)
+
+    def test_directory_names_with_spaces_are_rejected(self) -> None:
+        with self.assertRaisesRegex(ValueError, "unsafe directory name"):
+            parser.resolve_data_dir("input-data/Bad Name")
+
+    def test_directory_names_with_special_characters_are_rejected(self) -> None:
+        with self.assertRaisesRegex(ValueError, "unsafe directory name"):
+            parser.resolve_data_dir("input-data/bad@name")
+
+    def test_load_species_records_resolves_repo_relative_paths(self) -> None:
+        records = parser.load_species_records("input-data/InputFiles")
+
+        self.assertGreater(len(records), 0)
+
+    def test_duplicate_source_records_receive_unique_stable_slugs(self) -> None:
+        records = parser.load_species_records("input-data/InputFiles")
+        slug_by_source = {record.source_file: record.slug for record in records}
+
+        self.assertEqual(len(records), len(set(record.slug for record in records)))
+        self.assertEqual(
+            slug_by_source["Red Snapper_SLH_Outline2012_0722.txt"],
+            "red-snapper-red-snapper-slh-outline2012-0722",
+        )
+        self.assertEqual(
+            slug_by_source["RedSnapper_SLH_2012_0830_combined.txt"],
+            "red-snapper-redsnapper-slh-2012-0830-combined",
+        )
+        self.assertEqual(
+            slug_by_source["Sailfin Molly SLH RGG.txt"],
+            "sailfin-molly-sailfin-molly-slh-rgg",
+        )
+        self.assertTrue(
+            slug_by_source["Sailfin_Molly SLH RGG.txt"].startswith(
+                "sailfin-molly-sailfin-molly-slh-rgg-"
+            )
+        )
--- a/apps/api/tests/test_repository.py
+++ b/apps/api/tests/test_repository.py
@ -112,6 +112,35 @@ class RepositoryWorkflowTests(unittest.TestCase):
        self.assertEqual(detail["section_count"], 2)
        self.assertEqual([section["position"] for section in detail["sections"]], [1, 2])
        self.assertEqual([item["code"] for item in detail["diagnostics"]], ["missing_citations"])
+        self.assertEqual(
+            detail["legacy_identifiers"],
+            [
+                {
+                    "authority": "legacy-ecospecies",
+                    "identifier": "9999",
+                    "label": "FLELMR",
+                }
+            ],
+        )
+
+    def test_species_detail_includes_structured_document_and_legacy_source(self) -> None:
+        input_dir = Path(self.tempdir.name) / "input-data" / "InputFiles"
+        input_dir.mkdir(parents=True, exist_ok=True)
+        (input_dir / "Test Shad.txt").write_text("HEADER\nLegacy header content\n", encoding="utf-8")
+
+        with patch.object(repository, "get_default_data_dir", return_value=str(input_dir)):
+            detail = repository.get_species_by_slug("test-shad")
+
+        self.assertIsNotNone(detail)
+        assert detail is not None
+        self.assertEqual(detail["structured_document"]["source_format"], "ecospecies-markdown-v1")
+        self.assertIn(
+            "HABITAT",
+            [node["title"] for node in detail["structured_document"]["ast"]["nodes"]],
+        )
+        self.assertEqual(detail["legacy_source"]["source_file"], "Test Shad.txt")
+        self.assertIn("Legacy header content", detail["legacy_source"]["text"])
+        self.assertEqual(detail["taxon_identifiers"], [])

    def test_editorial_update_changes_publication_visibility_and_creates_audit(self) -> None:
        result = repository.update_species_editorial(
@ -207,6 +236,60 @@ class RepositoryWorkflowTests(unittest.TestCase):
        self.assertEqual(len(audit), 2)
        self.assertEqual([entry["action"] for entry in audit], ["section_update", "editorial_update"])

+    def test_reimport_preserves_persisted_taxon_identifiers(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad
+common_name: Test Shad
+scientific_name: Alosa testus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 9999
+    label: FLELMR
+taxon_identifiers:
+  - authority: gbif
+    identifier: 12345
+    label: taxonKey
+    primary: true
+primary_taxon_authority: gbif
+---
+
+## Summary
+Taxon-reviewed summary.
+""",
+            username="edith",
+        )
+
+        repository.import_species_payload(UPDATED_PAYLOAD)
+
+        detail = repository.get_editor_species_detail("test-shad")
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["primary_taxon_authority"], "gbif")
+        self.assertEqual(
+            detail["primary_taxon_identifier"],
+            {
+                "authority": "gbif",
+                "identifier": "12345",
+                "label": "taxonKey",
+                "primary": True,
+                "source_url": "",
+            },
+        )
+        self.assertEqual(
+            detail["taxon_identifiers"],
+            [
+                {
+                    "authority": "gbif",
+                    "identifier": "12345",
+                    "label": "taxonKey",
+                    "primary": True,
+                    "source_url": "",
+                }
+            ],
+        )
+
    def test_reimport_updates_summary_when_no_editorial_override_exists(self) -> None:
        repository.import_species_payload(UPDATED_PAYLOAD)

@ -302,6 +385,583 @@ class RepositoryWorkflowTests(unittest.TestCase):
        self.assertEqual(audit[0]["action"], "import_restore")
        self.assertEqual(audit[0]["details"]["is_archived"], {"from": True, "to": False})

+    def test_document_markdown_update_refreshes_flat_projection(self) -> None:
+        result = repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+species_code: 4242
+---
+
+## Summary
+Markdown summary.
+
+## Habitat
+Open water.
+
+### Type
+Pelagic.
+""",
+            username="frank",
+        )
+
+        detail = repository.get_editor_species_detail("test-shad")
+        document = repository.get_species_document("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(result)
+        self.assertIsNotNone(detail)
+        self.assertIsNotNone(document)
+        self.assertEqual(detail["title"], "Test Shad Markdown")
+        self.assertEqual(detail["scientific_name"], "Alosa markdownus")
+        self.assertEqual(detail["flelmr_code"], "4242")
+        self.assertEqual(detail["summary"], "Markdown summary.")
+        self.assertEqual(
+            [section["heading"] for section in detail["sections"]],
+            ["Habitat", "Habitat / Type"],
+        )
+        self.assertEqual(document["updated_by"], "frank")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "document_update")
+
+    def test_document_markdown_update_extracts_citations(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## Summary
+Markdown summary.
+
+## References
+
+- Smith, J. 2024. Example paper. doi:10.1000/example-doi
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        detail = repository.get_editor_species_detail("test-shad")
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["citation_count"], 2)
+        self.assertEqual(detail["citations"][0]["section_heading"], "References")
+        self.assertEqual(detail["citations"][0]["legacy_reference_number"], "")
+        self.assertEqual(detail["citations"][0]["doi"], "10.1000/example-doi")
+        self.assertTrue(detail["citations"][0]["citation_key"])
+        self.assertIn("@", detail["citations"][0]["draft_bibtex"])
+        self.assertEqual(detail["citations"][0]["review_status"], "draft")
+        self.assertEqual(detail["citations"][1]["legacy_reference_number"], "7")
+        self.assertEqual(detail["citations"][1]["doi"], "")
+        self.assertIn("ecospecies_reference_number = \\{7\\}", detail["citations"][1]["draft_bibtex"])
+
+    def test_editor_can_review_citations_and_reviews_survive_document_save(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        result = repository.update_species_citation_review(
+            slug="test-shad",
+            citation_id=citation["id"],
+            review_status="accepted",
+            normalized_text="Jones, A. (2022). Fisheries review.",
+            doi="10.1000/review-doi",
+            citation_key="jones2022review",
+            entry_type="article",
+            draft_bibtex="@article{jones2022review,\n  doi = {10.1000/review-doi}\n}",
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+        self.assertEqual(result["citation"]["source_type"], "editor_review")
+
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 1)
+        self.assertEqual(citations["citations"][0]["review_status"], "accepted")
+        self.assertEqual(citations["citations"][0]["doi"], "10.1000/review-doi")
+        self.assertEqual(citations["citations"][0]["citation_key"], "jones2022review")
+        self.assertEqual(citations["citations"][0]["entry_type"], "article")
+        self.assertIn("10.1000/review-doi", citations["citations"][0]["draft_bibtex"])
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[1]["action"], "citation_review_update")
+
+    def test_editor_can_run_citation_enrichment(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        with patch.object(
+            repository,
+            "enrich_citation_payload",
+            return_value={
+                "citation_key": "jones2022review",
+                "entry_type": "article",
+                "normalized_text": "Jones, A. (2022). Fisheries review. Journal of Tests. DOI:10.1000/review-doi",
+                "draft_bibtex": "@article{jones2022review,\n  doi = {10.1000/review-doi},\n}",
+                "doi": "10.1000/review-doi",
+                "source_url": "https://doi.org/10.1000/review-doi",
+                "openalex_id": "W12345",
+                "resolver_source_label": "crossref:doi:10.1000/review-doi",
+                "enrichment_status": "resolved",
+                "enrichment_error": "",
+                "conflicts": [],
+            },
+        ):
+            result = repository.update_species_citation_enrichment(
+                slug="test-shad",
+                citation_id=citation["id"],
+                username="edith",
+            )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["enrichment_status"], "resolved")
+        self.assertEqual(result["citation"]["doi"], "10.1000/review-doi")
+        self.assertEqual(result["citation"]["openalex_id"], "W12345")
+        self.assertEqual(result["citation"]["resolver_source_label"], "crossref:doi:10.1000/review-doi")
+        self.assertEqual(result["citation"]["source_url"], "https://doi.org/10.1000/review-doi")
+
+        citations = repository.get_editor_species_citations("test-shad")
+        audit = repository.list_species_audit("test-shad")
+
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citations"][0]["citation_key"], "jones2022review")
+        self.assertEqual(citations["citations"][0]["entry_type"], "article")
+        self.assertEqual(citations["citations"][0]["enrichment_status"], "resolved")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "citation_enrichment")
+
+    def test_editor_can_run_batch_citation_enrichment(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+- [8] Smith, B. 2021. Estuarine habitat paper.
+""",
+            username="frank",
+        )
+
+        payloads = [
+            {
+                "citation_key": "jones2022review",
+                "entry_type": "article",
+                "normalized_text": "Jones, A. (2022). Fisheries review.",
+                "draft_bibtex": "@article{jones2022review,\n}",
+                "doi": "10.1000/review-doi",
+                "source_url": "https://doi.org/10.1000/review-doi",
+                "openalex_id": "W12345",
+                "resolver_source_label": "crossref:doi:10.1000/review-doi",
+                "enrichment_status": "resolved",
+                "enrichment_error": "",
+                "conflicts": [],
+            },
+            {
+                "citation_key": "smith2021estuarine",
+                "entry_type": "misc",
+                "normalized_text": "",
+                "draft_bibtex": "",
+                "doi": "",
+                "source_url": "",
+                "openalex_id": "",
+                "resolver_source_label": "",
+                "enrichment_status": "unresolved",
+                "enrichment_error": "No metadata match found from DOI, title, or authority identifiers.",
+                "conflicts": [],
+            },
+        ]
+
+        with patch.object(repository, "enrich_citation_payload", side_effect=payloads):
+            result = repository.update_species_citations_enrichment_batch(
+                slug="test-shad",
+                username="edith",
+            )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation_count"], 2)
+        self.assertEqual(result["changed_count"], 2)
+        self.assertEqual(result["resolved_count"], 1)
+        self.assertEqual(result["unresolved_count"], 1)
+        self.assertEqual(result["error_count"], 0)
+
+    def test_editor_can_review_and_apply_citation_candidates(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+
+        with patch.object(
+            repository,
+            "discover_citation_candidates",
+            return_value={
+                "seed": {
+                    "fields": {
+                        "author": "Daniell, W.C.",
+                        "year": "1872",
+                        "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                        "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                        "volume": "2",
+                        "pages": "387-390",
+                    }
+                },
+                "candidate_count": 1,
+                "candidates": [
+                    {
+                        "candidate_id": "crossref-search-1-daniell-good",
+                        "source_label": "crossref:search:1:daniell-good",
+                        "entry_type": "article",
+                        "citation_key": "daniell1872lettersreferringexperiments",
+                        "fields": {
+                            "author": "Daniell, W.C.",
+                            "year": "1872",
+                            "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                            "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                            "volume": "2",
+                            "pages": "387-390",
+                        },
+                    }
+                ],
+            },
+        ):
+            candidates = repository.get_species_citation_candidates("test-shad", citation["id"])
+
+        self.assertIsNotNone(candidates)
+        self.assertEqual(candidates["candidate_count"], 1)
+
+        result = repository.apply_species_citation_candidate_selection(
+            slug="test-shad",
+            citation_id=citation["id"],
+            candidate={
+                "source_label": "crossref:search:1:daniell-good",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Daniell, W.C.",
+                    "year": "1872",
+                    "title": "Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River",
+                    "journal": "Comm. Rept. U.S. Comm. Fish & Fish.",
+                    "volume": "2",
+                    "pages": "387-390",
+                },
+            },
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["resolver_source_label"], "editor:selected:crossref:search:1:daniell-good")
+        self.assertEqual(result["citation"]["source_type"], "editor_selected_candidate")
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+        audit = repository.list_species_audit("test-shad")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "citation_candidate_selection")
+
+    def test_editor_can_add_candidate_as_additional_citation_and_preserve_it(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Daniell, W.C. 1872. Letters referring to experiments of W.C. Daniell, M.D., in introducing shad into the Alabama River. Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.
+""",
+            username="frank",
+        )
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        source_citation = citations["citations"][0]
+
+        result = repository.add_species_citation_from_candidate(
+            slug="test-shad",
+            citation_id=source_citation["id"],
+            candidate={
+                "source_label": "crossref:search:1:daniell-related",
+                "entry_type": "article",
+                "fields": {
+                    "author": "Jordan, F.",
+                    "year": "2009",
+                    "title": "Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA",
+                    "journal": "Transactions of the American Fisheries Society",
+                    "volume": "19",
+                    "number": "1",
+                    "pages": "107-115",
+                    "doi": "10.1111/j.1600-0633.2009.00395.x",
+                    "url": "https://doi.org/10.1111/j.1600-0633.2009.00395.x",
+                },
+            },
+            username="edith",
+        )
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result["citation"]["source_type"], "editor_added_candidate")
+        self.assertEqual(result["citation"]["review_status"], "accepted")
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 2)
+        self.assertEqual(citations["citations"][1]["section_heading"], "References")
+        document = repository.get_species_document("test-shad")
+        self.assertIsNotNone(document)
+        self.assertIn("Habitat use of age 0 Alabama shad in the Pascagoula River drainage, USA", document["markdown"])
+
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown=document["markdown"],
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        self.assertEqual(citations["citation_count"], 2)
+        self.assertEqual(citations["citations"][1]["source_type"], "editor_added_candidate")
+        audit = repository.list_species_audit("test-shad")
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "document_update")
+        self.assertEqual(audit[1]["action"], "citation_candidate_addition")
+
+    def test_contributor_can_view_only_owned_citations(self) -> None:
+        created = repository.create_contributor_species(
+            "writer@example.org",
+            """---
+title: Contributor Draft
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code:
+---
+
+## References
+
+- [12] Example, A. 2025. Draft reference.
+""",
+        )
+
+        owned = repository.get_contributor_species_citations(created["slug"], "writer@example.org")
+        other = repository.get_contributor_species_citations(created["slug"], "other@example.org")
+
+        self.assertIsNotNone(owned)
+        self.assertEqual(owned["citation_count"], 1)
+        self.assertEqual(owned["citations"][0]["legacy_reference_number"], "12")
+        self.assertIsNone(other)
+
+    def test_public_bibliography_aggregates_species_citations(self) -> None:
+        repository.update_species_document_markdown(
+            slug="test-shad",
+            markdown="""---
+title: Test Shad Markdown
+common_name: Test Shad
+scientific_name: Alosa markdownus
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 4242
+    label: FLELMR
+---
+
+## References
+
+- [7] Jones, A. 2022. Fisheries review.
+""",
+            username="frank",
+        )
+
+        citations = repository.get_editor_species_citations("test-shad")
+        self.assertIsNotNone(citations)
+        citation = citations["citations"][0]
+        repository.update_species_citation_review(
+            slug="test-shad",
+            citation_id=citation["id"],
+            review_status="accepted",
+            normalized_text="Jones, A. (2022). Fisheries review.",
+            doi="10.1000/review-doi",
+            citation_key="jones2022review",
+            entry_type="article",
+            draft_bibtex="@article{jones2022review,\n  doi = {10.1000/review-doi}\n}",
+            username="edith",
+            abstract_text="A short abstract about fisheries review.",
+        )
+
+        bibliography = repository.list_public_bibliography()
+
+        self.assertEqual(len(bibliography), 1)
+        self.assertEqual(bibliography[0]["citation_key"], "jones2022review")
+        self.assertEqual(bibliography[0]["abstract_text"], "A short abstract about fisheries review.")
+        self.assertEqual(bibliography[0]["legacy_reference_numbers"], ["7"])
+        self.assertEqual(bibliography[0]["species_count"], 1)
+        self.assertEqual(bibliography[0]["species_refs"][0]["slug"], "test-shad")
+
+    def test_register_contributor_creates_token_and_enforces_age_gate(self) -> None:
+        with self.assertRaisesRegex(ValueError, "at least 13 years old"):
+            repository.register_contributor("person@example.org", False)
+
+        result = repository.register_contributor("Person@Example.org", True)
+
+        self.assertEqual(result["username"], "person@example.org")
+        self.assertEqual(result["role"], "contributor")
+        self.assertEqual(result["minimum_age"], 13)
+        self.assertTrue(result["token"])
+
+    def test_contributor_can_create_and_edit_only_owned_species(self) -> None:
+        created = repository.create_contributor_species(
+            "writer@example.org",
+            """---
+title: Contributor Draft
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code: 
+---
+
+## Summary
+Draft summary.
+
+## Habitat
+Mangroves.
+""",
+        )
+
+        detail = repository.get_contributor_species_detail(created["slug"], "writer@example.org")
+        public_detail = repository.get_species_by_slug(created["slug"])
+
+        self.assertIsNotNone(detail)
+        self.assertIsNone(public_detail)
+        self.assertEqual(detail["publication_status"], "draft")
+        self.assertEqual(detail["common_name"], "Contributor Fish")
+
+        updated = repository.update_contributor_species_document_markdown(
+            created["slug"],
+            """---
+title: Contributor Draft Revised
+common_name: Contributor Fish
+scientific_name: Pisces contributoris
+species_code: 
+---
+
+## Summary
+Revised summary.
+
+## Habitat
+Seagrass.
+
+### Depth
+Shallow bays.
+""",
+            "writer@example.org",
+        )
+
+        self.assertIsNotNone(updated)
+        detail = repository.get_contributor_species_detail(created["slug"], "writer@example.org")
+        other_user_detail = repository.get_contributor_species_detail(created["slug"], "other@example.org")
+        audit = repository.list_species_audit(created["slug"])
+
+        self.assertIsNotNone(detail)
+        self.assertEqual(detail["summary"], "Revised summary.")
+        self.assertEqual(
+            [section["heading"] for section in detail["sections"]],
+            ["Habitat", "Habitat / Depth"],
+        )
+        self.assertIsNone(other_user_detail)
+        self.assertIsNotNone(audit)
+        self.assertEqual(audit[0]["action"], "contributor_document_update")
+

 if __name__ == "__main__":
    unittest.main()
--- a/apps/web/app.js
+++ b/apps/web/app.js
--- a/apps/web/bibliography.html
+++ b/apps/web/bibliography.html
@ -0,0 +1,43 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>EcoSpecies Bibliography</title>
+    <link rel="stylesheet" href="./styles.css">
+  </head>
+  <body>
+    <header class="site-header">
+      <div class="site-header-inner">
+        <div class="site-brand">
+          <p class="site-brand-mark">Open Species Archive</p>
+          <a href="./index.html" class="site-brand-link">EcoSpecies Atlas</a>
+          <p class="site-brand-summary">Public field atlas for historical species life-history materials.</p>
+        </div>
+        <nav class="site-nav" aria-label="Primary">
+          <a href="./index.html">Atlas</a>
+          <a href="./bibliography.html">Bibliography</a>
+        </nav>
+      </div>
+    </header>
+    <main class="page">
+      <section class="hero">
+        <p class="eyebrow">EcoSpecies Atlas</p>
+        <h1>Bibliography</h1>
+        <p class="lede">
+          A site-wide bibliography for the EcoSpecies atlas, including imported references and citations added during review.
+        </p>
+        <div class="auth-bar auth-panel-row">
+          <input id="bibliography-search" type="search" placeholder="Search title, author, DOI, or abstract">
+          <button id="bibliography-download" type="button" class="secondary-button">Download BibTeX</button>
+          <p id="bibliography-status" class="auth-status">Loading bibliography...</p>
+        </div>
+      </section>
+
+      <section class="panel">
+        <div id="bibliography-list" class="public-citation-list"></div>
+      </section>
+    </main>
+    <script src="./bibliography.js" defer></script>
+  </body>
+</html>
--- a/apps/web/bibliography.js
+++ b/apps/web/bibliography.js
@ -0,0 +1,230 @@
+function getAppBase() {
+  const { pathname } = window.location;
+  if (pathname === "/" || pathname === "/index.html") {
+    return "";
+  }
+  if (pathname.endsWith("/index.html")) {
+    return pathname.slice(0, -"/index.html".length);
+  }
+  return pathname.endsWith("/") ? pathname.slice(0, -1) : pathname;
+}
+
+const apiBase = getAppBase().replace(/\/bibliography\.html$/, "");
+const bibliographyList = document.querySelector("#bibliography-list");
+const bibliographySearch = document.querySelector("#bibliography-search");
+const bibliographyStatus = document.querySelector("#bibliography-status");
+const bibliographyDownload = document.querySelector("#bibliography-download");
+let currentBibliographyItems = [];
+
+function escapeHtml(value) {
+  return String(value)
+    .replaceAll("&", "&amp;")
+    .replaceAll('"', "&quot;")
+    .replaceAll("<", "&lt;")
+    .replaceAll(">", "&gt;");
+}
+
+function normalizeAbstractForDisplay(value) {
+  const raw = String(value || "").trim();
+  if (!raw) {
+    return "";
+  }
+  const temp = document.createElement("div");
+  temp.innerHTML = raw;
+  return temp.textContent
+    .replace(/^abstract\s*[:.\-]?\s*/i, "")
+    .replace(/\s+/g, " ")
+    .trim();
+}
+
+function parseBibtexFields(draftBibtex) {
+  const fields = {};
+  const text = String(draftBibtex || "");
+  const pattern = /([a-zA-Z_]+)\s*=\s*\{([^}]*)\}/g;
+  let match = pattern.exec(text);
+  while (match) {
+    fields[match[1].toLowerCase()] = match[2].trim();
+    match = pattern.exec(text);
+  }
+  return fields;
+}
+
+function collectBibtexRecords(items) {
+  const seen = new Set();
+  const records = [];
+  for (const item of items || []) {
+    const draftBibtex = String(item && item.draft_bibtex ? item.draft_bibtex : "").trim();
+    if (!draftBibtex || seen.has(draftBibtex)) {
+      continue;
+    }
+    seen.add(draftBibtex);
+    records.push(draftBibtex);
+  }
+  return records;
+}
+
+function downloadBibtexRecords(items, filenameStem) {
+  const records = collectBibtexRecords(items);
+  if (!records.length) {
+    return false;
+  }
+  const blob = new Blob([`${records.join("\n\n")}\n`], { type: "application/x-bibtex;charset=utf-8" });
+  const url = URL.createObjectURL(blob);
+  const link = document.createElement("a");
+  link.href = url;
+  link.download = `${filenameStem}.bib`;
+  document.body.appendChild(link);
+  link.click();
+  document.body.removeChild(link);
+  window.setTimeout(() => URL.revokeObjectURL(url), 0);
+  return true;
+}
+
+function syncDownloadButton(items) {
+  if (!bibliographyDownload) {
+    return;
+  }
+  const recordCount = collectBibtexRecords(items).length;
+  bibliographyDownload.disabled = !recordCount;
+  bibliographyDownload.textContent = recordCount
+    ? `Download BibTeX (${recordCount})`
+    : "Download BibTeX";
+}
+
+function buildCitationText(item) {
+  const fields = parseBibtexFields(item.draft_bibtex || "");
+  if (item.normalized_text) {
+    return escapeHtml(item.normalized_text);
+  }
+  const author = fields.author || "";
+  const year = fields.year || "";
+  const title = fields.title || "";
+  const venue = fields.journal || fields.booktitle || fields.publisher || "";
+  const volume = fields.volume || "";
+  const issue = fields.number || "";
+  const pages = fields.pages || "";
+  const parts = [];
+  const lead = [author, year ? `(${year})` : ""].filter(Boolean).join(" ");
+  if (lead) {
+    parts.push(lead);
+  }
+  if (title) {
+    parts.push(title);
+  }
+  const venueBits = [venue, volume ? `${volume}${issue ? `(${issue})` : ""}` : issue ? `(${issue})` : "", pages]
+    .filter(Boolean)
+    .join(", ");
+  if (venueBits) {
+    parts.push(venueBits);
+  }
+  return escapeHtml(parts.join(". ").trim() || item.raw_text || "");
+}
+
+function renderSpeciesRefs(refs) {
+  return refs
+    .map(
+      (ref) =>
+        `<a href="./index.html#${escapeHtml(ref.slug)}">${escapeHtml(ref.common_name || ref.slug)}</a>`,
+    )
+    .join(", ");
+}
+
+function renderAbstractBlock(text) {
+  const abstract = normalizeAbstractForDisplay(text);
+  if (!abstract) {
+    return "";
+  }
+  return `
+    <div class="citation-abstract-shell">
+      <button type="button" class="secondary-button citation-abstract-toggle" aria-expanded="false">
+        Show Abstract
+      </button>
+      <div class="citation-abstract-display hidden">
+        <p class="public-citation-abstract">${escapeHtml(abstract)}</p>
+      </div>
+    </div>
+  `;
+}
+
+function attachCitationAbstractToggles(root) {
+  for (const toggle of root.querySelectorAll(".citation-abstract-toggle")) {
+    const shell = toggle.parentElement;
+    const display = shell && shell.querySelector(".citation-abstract-display");
+    if (!display) {
+      continue;
+    }
+    toggle.addEventListener("click", () => {
+      const hidden = display.classList.toggle("hidden");
+      toggle.setAttribute("aria-expanded", hidden ? "false" : "true");
+      toggle.textContent = hidden ? "Show Abstract" : "Hide Abstract";
+    });
+  }
+}
+
+function renderBibliography(items) {
+  bibliographyList.innerHTML = "";
+  if (!items.length) {
+    bibliographyList.innerHTML = `<p class="editor-status">No bibliography entries match the current search.</p>`;
+    return;
+  }
+
+  for (const item of items) {
+    const links = [
+      item.doi ? `<a href="https://doi.org/${encodeURIComponent(String(item.doi).replace(/^https?:\/\/doi\.org\//, ""))}" target="_blank" rel="noopener noreferrer">DOI</a>` : "",
+      item.source_url ? `<a href="${escapeHtml(item.source_url)}" target="_blank" rel="noopener noreferrer">Source</a>` : "",
+      item.openalex_id ? `<a href="https://openalex.org/${escapeHtml(String(item.openalex_id).replace(/^https?:\/\/openalex\.org\//, ""))}" target="_blank" rel="noopener noreferrer">OpenAlex</a>` : "",
+    ]
+      .filter(Boolean)
+      .join(" · ");
+
+    const article = document.createElement("article");
+    article.className = "public-citation-entry";
+    article.innerHTML = `
+      <p class="public-citation-text">${buildCitationText(item)}</p>
+      ${renderAbstractBlock(item.abstract_text || "")}
+      <p class="public-citation-meta">
+        Appears in ${item.species_count} species record${item.species_count === 1 ? "" : "s"}
+        ${item.legacy_reference_numbers && item.legacy_reference_numbers.length ? ` • Imported references: ${item.legacy_reference_numbers.map((value) => escapeHtml(value)).join(", ")}` : ""}
+      </p>
+      <p class="public-citation-meta">Species: ${renderSpeciesRefs(item.species_refs || [])}</p>
+      ${links ? `<p class="public-citation-links">${links}</p>` : ""}
+    `;
+    attachCitationAbstractToggles(article);
+    bibliographyList.appendChild(article);
+  }
+}
+
+async function loadBibliography(search = "") {
+  bibliographyStatus.textContent = "Loading bibliography...";
+  const query = search ? `?search=${encodeURIComponent(search)}` : "";
+  const response = await fetch(`${apiBase}/api/bibliography${query}`);
+  const data = await response.json();
+  if (!response.ok) {
+    bibliographyList.innerHTML = `<p class="error">${escapeHtml(data.error || "Unable to load bibliography.")}</p>`;
+    bibliographyStatus.textContent = data.error || "Bibliography load failed";
+    return;
+  }
+
+  currentBibliographyItems = data.items || [];
+  renderBibliography(currentBibliographyItems);
+  syncDownloadButton(currentBibliographyItems);
+  bibliographyStatus.textContent = `${data.count || 0} bibliography entr${data.count === 1 ? "y" : "ies"}`;
+}
+
+bibliographySearch.addEventListener("input", async (event) => {
+  await loadBibliography(event.target.value);
+});
+
+loadBibliography().catch((error) => {
+  bibliographyList.innerHTML = `<p class="error">Failed to load bibliography: ${escapeHtml(String(error))}</p>`;
+  bibliographyStatus.textContent = "Bibliography load failed";
+});
+
+if (bibliographyDownload) {
+  bibliographyDownload.addEventListener("click", () => {
+    const downloaded = downloadBibtexRecords(currentBibliographyItems, "ecospecies-bibliography");
+    if (!downloaded) {
+      bibliographyStatus.textContent = "No BibTeX records are available for download yet.";
+    }
+  });
+}
--- a/apps/web/index.html
+++ b/apps/web/index.html
@ -7,20 +7,31 @@
    <link rel="stylesheet" href="./styles.css">
  </head>
  <body>
+    <header class="site-header">
+      <div class="site-header-inner">
+        <div class="site-brand">
+          <p class="site-brand-mark">Open Species Archive</p>
+          <a href="./index.html" class="site-brand-link">EcoSpecies Atlas</a>
+          <p class="site-brand-summary">Public field atlas for historical species life-history materials.</p>
+        </div>
+        <nav class="site-nav" aria-label="Primary">
+          <a href="./index.html">Atlas</a>
+          <a href="./bibliography.html">Bibliography</a>
+        </nav>
+      </div>
+    </header>
    <main class="page">
      <section class="hero">
-        <p class="eyebrow">Marine Species Knowledge System</p>
-        <h1>EcoSpecies</h1>
+        <p class="eyebrow">Open Biodiversity Reference</p>
+        <h1>EcoSpecies Atlas</h1>
        <p class="lede">
-          A modern follow-on for the legacy EcoSpecies archive, starting with direct ingestion
-          of historical Species Life History text files.
+          A modern follow-on for the legacy EcoSpecies archive, built as an open ecology and
+          biodiversity reference workspace.
+        </p>
+        <p class="hero-context">
+          Use EcoSpecies Atlas for species profiles, habitat evidence, ecological reading, and
+          citation-aware exploration grounded in the migrated legacy corpus.
        </p>
-        <div class="auth-bar">
-          <input id="auth-token" type="password" placeholder="Bearer token for editor access">
-          <button id="auth-save" type="button">Use Token</button>
-          <button id="auth-clear" type="button" class="secondary-button">Clear</button>
-          <p id="auth-status" class="auth-status">Public access</p>
-        </div>
        <div class="hero-stats">
          <div class="stat">
            <span id="species-count">0</span>
@ -38,6 +49,7 @@
          <div class="panel-header">
            <h2>Species</h2>
            <input id="search" type="search" placeholder="Search common or scientific name">
+            <button id="contributor-create" type="button" class="secondary-button hidden">Create New Draft</button>
            <div id="archive-filter-group" class="archive-filter-group hidden">
              <button type="button" class="archive-filter-button is-active" data-archive-filter="active">Active</button>
              <button type="button" class="archive-filter-button" data-archive-filter="all">All</button>
@ -66,44 +78,166 @@
                This record is archived. It is hidden from public endpoints but remains available to editors for audit and recovery.
              </p>
            </header>
-            <section id="editor-panel" class="detail-section editor-panel hidden">
-              <h3>Editor Controls</h3>
-              <label class="editor-label" for="editor-publication-status">Publication Status</label>
-              <select id="editor-publication-status">
-                <option value="draft">Draft</option>
-                <option value="review">Review</option>
-                <option value="published">Published</option>
-              </select>
-              <label class="editor-label" for="editor-summary">Summary</label>
-              <textarea id="editor-summary" rows="5" placeholder="Write a concise executive summary."></textarea>
-              <label class="editor-label" for="editor-notes">Editor Notes</label>
-              <textarea id="editor-notes" rows="4" placeholder="Internal editorial notes"></textarea>
-              <label class="archive-toggle">
-                <input id="editor-is-archived" type="checkbox">
-                <span>Archive this species</span>
-              </label>
-              <div class="editor-actions">
-                <button id="editor-save" type="button">Save Editorial Changes</button>
-                <p id="editor-status" class="editor-status"></p>
-              </div>
-            </section>
-            <section id="audit-panel" class="detail-section hidden">
-              <h3>Audit History</h3>
-              <div id="audit-list" class="audit-list"></div>
-            </section>
            <div id="detail-sections" class="detail-sections"></div>
+            <div class="workflow-panels">
+              <section id="legacy-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Legacy Materials Under Review">
+                <div class="collapsible-header">
+                  <h3>Legacy Materials Under Review</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="legacy-panel" data-label="Legacy Materials Under Review" aria-expanded="false">
+                    Show Legacy Materials Under Review
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <p id="legacy-source-meta" class="editor-status"></p>
+                  <pre id="legacy-source-text" class="legacy-source"></pre>
+                </div>
+              </section>
+              <section id="access-panel" class="detail-section collapsible-panel collapsed" data-label="Access and Contribution">
+                <div class="collapsible-header">
+                  <h3>Access and Contribution</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="access-panel" data-label="Access and Contribution" aria-expanded="false">
+                    Show Access and Contribution
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="auth-bar auth-panel-row">
+                    <input id="auth-token" type="password" placeholder="Bearer token for editor access">
+                    <button id="auth-save" type="button">Use Token</button>
+                    <button id="auth-clear" type="button" class="secondary-button">Clear</button>
+                    <p id="auth-status" class="auth-status">Public access</p>
+                  </div>
+                  <div class="auth-bar contributor-signup auth-panel-row">
+                    <input id="contributor-email" type="email" placeholder="Email for contributor access">
+                    <label class="archive-toggle contributor-age-gate">
+                      <input id="contributor-age-gate" type="checkbox">
+                      <span>I confirm I am at least <span id="contributor-age-label">13</span> years old</span>
+                    </label>
+                    <button id="contributor-register" type="button" class="secondary-button">Become Contributor</button>
+                    <p id="contributor-status" class="auth-status"></p>
+                  </div>
+                </div>
+              </section>
+              <section id="editor-panel" class="detail-section collapsible-panel editor-panel collapsed hidden" data-label="Editing Workflow">
+                <div class="collapsible-header">
+                  <h3>Editing Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="editor-panel" data-label="Editing Workflow" aria-expanded="false">
+                    Show Editing Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <label class="editor-label" for="editor-publication-status">Publication Status</label>
+                  <select id="editor-publication-status">
+                    <option value="draft">Draft</option>
+                    <option value="review">Review</option>
+                    <option value="published">Published</option>
+                  </select>
+                  <label class="editor-label" for="editor-notes">Editor Notes</label>
+                  <textarea id="editor-notes" rows="4" placeholder="Internal editorial notes"></textarea>
+                  <label class="archive-toggle">
+                    <input id="editor-is-archived" type="checkbox">
+                    <span>Archive this species</span>
+                  </label>
+                  <div class="editor-actions">
+                    <button id="editor-save" type="button">Save Editorial Changes</button>
+                    <p id="editor-status" class="editor-status"></p>
+                  </div>
+                </div>
+              </section>
+              <section id="document-panel" class="detail-section collapsible-panel editor-panel collapsed hidden" data-label="Metadata and Document Workflow">
+                <div class="collapsible-header">
+                  <h3>Metadata and Document Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="document-panel" data-label="Metadata and Document Workflow" aria-expanded="false">
+                    Show Metadata and Document Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="document-panel-header">
+                    <div>
+                      <p class="editor-status">
+                        Markdown is the editable source of truth for hierarchy. Front matter and headings are validated on save.
+                      </p>
+                    </div>
+                    <div class="editor-actions">
+                      <button id="document-save" type="button">Save Document</button>
+                      <p id="document-status" class="editor-status"></p>
+                    </div>
+                  </div>
+                  <label class="editor-label" for="document-markdown">Markdown Source</label>
+                  <textarea id="document-markdown" class="document-editor" rows="18" spellcheck="false"></textarea>
+                  <details class="document-preview-shell" open>
+                    <summary>Outline Preview</summary>
+                    <div id="document-preview" class="document-preview"></div>
+                  </details>
+                </div>
+              </section>
+              <section id="citation-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Review Workflow">
+                <div class="collapsible-header">
+                  <h3>Review Workflow</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="citation-panel" data-label="Review Workflow" aria-expanded="false">
+                    Show Review Workflow
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div class="document-panel-header">
+                    <div>
+                      <p id="citation-status" class="editor-status">
+                        Extracted bibliography entries and draft BibTeX records.
+                      </p>
+                    </div>
+                    <div class="editor-actions">
+                      <button id="citation-backfill-species" type="button" class="secondary-button hidden">Backfill This Species</button>
+                      <button id="citation-enrich-all" type="button" class="secondary-button hidden">Run Enrichment For All Citations</button>
+                    </div>
+                  </div>
+                  <div id="citation-list" class="citation-list"></div>
+                </div>
+              </section>
+              <section id="audit-panel" class="detail-section collapsible-panel collapsed hidden" data-label="Audit History">
+                <div class="collapsible-header">
+                  <h3>Audit History</h3>
+                  <button type="button" class="secondary-button collapsible-toggle" data-target="audit-panel" data-label="Audit History" aria-expanded="false">
+                    Show Audit History
+                  </button>
+                </div>
+                <div class="collapsible-body">
+                  <div id="audit-list" class="audit-list"></div>
+                </div>
+              </section>
+            </div>
          </article>
        </section>
      </section>

      <footer class="footer">
        <p>
-          This migration path preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood,
+          EcoSpecies Atlas preserves attribution for Dr. Peter Rubec, Dr. Diane Blackwood,
          Dr. Welsbery R. Elsberry, and the Florida Fish and Wildlife Research Institute context
          documented in the legacy project materials.
        </p>
      </footer>
    </main>
+    <section id="citation-match-dialog" class="match-dialog-shell hidden" aria-hidden="true">
+      <div class="match-dialog-backdrop"></div>
+      <article class="match-dialog-card" role="dialog" aria-modal="true" aria-labelledby="citation-match-title">
+        <div class="match-dialog-header">
+          <div>
+            <h2 id="citation-match-title">Citation Candidate Review</h2>
+            <p id="citation-match-status" class="editor-status">Compare the parsed source citation against candidate metadata.</p>
+          </div>
+          <button id="citation-match-close" type="button" class="secondary-button">Close</button>
+        </div>
+        <div class="match-dialog-grid">
+          <section class="detail-section">
+            <h3>Parsed Source Metadata</h3>
+            <div id="citation-match-seed" class="match-seed"></div>
+          </section>
+          <section class="detail-section">
+            <h3>Candidate Matches</h3>
+            <div id="citation-match-candidates" class="match-candidates"></div>
+          </section>
+        </div>
+      </article>
+    </section>
    <script src="./app.js" defer></script>
  </body>
 </html>
--- a/apps/web/nginx.conf
+++ b/apps/web/nginx.conf
@ -5,6 +5,10 @@ server {
  root /usr/share/nginx/html;
  index index.html;

+  location = /apps/ecospecies {
+    return 301 /apps/ecospecies/;
+  }
+
  location /api/ {
    proxy_pass http://api:8000/api/;
    proxy_http_version 1.1;
@ -14,19 +18,46 @@ server {
    proxy_set_header X-Forwarded-Proto $scheme;
  }

+  location /apps/ecospecies/api/ {
+    rewrite ^/apps/ecospecies/api/(.*)$ /api/$1 break;
+    proxy_pass http://api:8000;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+  }
+
  location /healthz {
    proxy_pass http://api:8000/healthz;
    proxy_http_version 1.1;
    proxy_set_header Host $host;
  }

+  location /apps/ecospecies/healthz {
+    proxy_pass http://api:8000/healthz;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+  }
+
  location /readyz {
    proxy_pass http://api:8000/readyz;
    proxy_http_version 1.1;
    proxy_set_header Host $host;
  }

+  location /apps/ecospecies/readyz {
+    proxy_pass http://api:8000/readyz;
+    proxy_http_version 1.1;
+    proxy_set_header Host $host;
+  }
+
  location / {
    try_files $uri $uri/ /index.html;
  }
+
+  location /apps/ecospecies/ {
+    rewrite ^/apps/ecospecies/(.*)$ /$1 break;
+    try_files $uri $uri/ /index.html;
+  }
 }
--- a/apps/web/styles.css
+++ b/apps/web/styles.css
@ -1,12 +1,12 @@
 :root {
-  --bg: #f4efe6;
-  --paper: rgba(255, 252, 247, 0.78);
-  --ink: #16251f;
-  --muted: #58655f;
-  --accent: #0f766e;
-  --accent-2: #bc6c25;
-  --line: rgba(22, 37, 31, 0.12);
-  --shadow: 0 24px 70px rgba(24, 35, 30, 0.15);
+  --bg: #f4f7fb;
+  --paper: rgba(255, 255, 255, 0.88);
+  --ink: #182433;
+  --muted: #5f6b7d;
+  --accent: #2457a6;
+  --accent-2: #1f7a5a;
+  --line: rgba(24, 36, 51, 0.11);
+  --shadow: 0 24px 70px rgba(33, 52, 84, 0.14);
 }

 * {
@ -15,12 +15,83 @@

 body {
  margin: 0;
-  font-family: Georgia, "Times New Roman", serif;
+  font-family: "Segoe UI", "Helvetica Neue", Arial, sans-serif;
  color: var(--ink);
  background:
-    radial-gradient(circle at top left, rgba(15, 118, 110, 0.14), transparent 28%),
-    radial-gradient(circle at top right, rgba(188, 108, 37, 0.16), transparent 24%),
-    linear-gradient(180deg, #f8f4ec, #efe6d7 70%, #e7dcc9);
+    radial-gradient(circle at top left, rgba(36, 87, 166, 0.14), transparent 26%),
+    radial-gradient(circle at top right, rgba(31, 122, 90, 0.12), transparent 24%),
+    linear-gradient(180deg, #f4f7fb, #e4edf6 72%, #d9e6ef);
+}
+
+.site-header {
+  width: min(1320px, calc(100vw - 32px));
+  margin: 0 auto;
+  padding-top: 24px;
+}
+
+.site-header-inner {
+  display: flex;
+  gap: 18px;
+  align-items: center;
+  justify-content: space-between;
+  padding: 18px 22px;
+  border-radius: 24px;
+  backdrop-filter: blur(10px);
+  background: var(--paper);
+  border: 1px solid var(--line);
+  box-shadow: var(--shadow);
+}
+
+.site-brand {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.site-brand-mark {
+  margin: 0;
+  color: var(--accent);
+  text-transform: uppercase;
+  letter-spacing: 0.18em;
+  font-size: 0.76rem;
+}
+
+.site-brand-link {
+  color: var(--ink);
+  font-size: 1.5rem;
+  font-weight: 700;
+  text-decoration: none;
+}
+
+.site-brand-summary {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.94rem;
+}
+
+.site-nav {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 10px;
+  justify-content: flex-end;
+}
+
+.site-nav a {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  border-radius: 999px;
+  padding: 11px 16px;
+  text-decoration: none;
+  color: var(--ink);
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.72);
+  transition: transform 160ms ease, border-color 160ms ease;
+}
+
+.site-nav a:hover {
+  transform: translateY(-1px);
+  border-color: rgba(15, 118, 110, 0.45);
 }

 .page {
@ -42,6 +113,9 @@ body {
 .hero {
  padding: 28px;
  margin-bottom: 20px;
+  background:
+    linear-gradient(135deg, rgba(255, 255, 255, 0.95), rgba(234, 244, 240, 0.92)),
+    var(--paper);
 }

 .eyebrow {
@ -56,6 +130,7 @@ h1 {
  margin: 0;
  font-size: clamp(2.8rem, 7vw, 5.6rem);
  line-height: 0.92;
+  letter-spacing: -0.03em;
 }

 .lede {
@ -64,6 +139,12 @@ h1 {
  font-size: 1.08rem;
 }

+.hero-context {
+  max-width: 68ch;
+  color: var(--muted);
+  line-height: 1.58;
+}
+
 .hero-stats {
  display: flex;
  gap: 16px;
@ -79,6 +160,15 @@ h1 {
  margin-top: 18px;
 }

+.auth-panel-row {
+  margin-top: 0;
+}
+
+.contributor-signup {
+  padding-top: 14px;
+  border-top: 1px solid var(--line);
+}
+
 .auth-bar input {
  min-width: min(360px, 100%);
  flex: 1;
@ -93,7 +183,7 @@ h1 {
  min-width: 180px;
  padding: 14px 16px;
  border-radius: 18px;
-  background: rgba(255, 255, 255, 0.6);
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(232, 242, 239, 0.92));
  border: 1px solid var(--line);
 }

@ -158,6 +248,16 @@ input[type="search"] {
  background: rgba(255, 255, 255, 0.9);
 }

+input[type="text"],
+input[type="email"],
+input[type="password"] {
+  border: 1px solid var(--line);
+  border-radius: 18px;
+  padding: 12px 14px;
+  font: inherit;
+  background: rgba(255, 255, 255, 0.92);
+}
+
 select,
 textarea,
 button {
@ -201,7 +301,7 @@ button {
  padding: 14px;
  border-radius: 18px;
  border: 1px solid var(--line);
-  background: linear-gradient(180deg, rgba(255, 255, 255, 0.95), rgba(241, 237, 230, 0.95));
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.97), rgba(239, 246, 244, 0.94));
  cursor: pointer;
  transition: transform 160ms ease, border-color 160ms ease;
 }
@ -213,7 +313,7 @@ button {

 .species-card-archived {
  border-style: dashed;
-  background: linear-gradient(180deg, rgba(247, 241, 231, 0.98), rgba(233, 226, 214, 0.98));
+  background: linear-gradient(180deg, rgba(243, 247, 249, 0.98), rgba(227, 236, 242, 0.98));
 }

 .species-name,
@ -273,6 +373,32 @@ button {
  display: none;
 }

+.match-dialog-shell {
+  position: fixed;
+  inset: 0;
+  z-index: 50;
+}
+
+.match-dialog-backdrop {
+  position: absolute;
+  inset: 0;
+  background: rgba(12, 20, 18, 0.46);
+}
+
+.match-dialog-card {
+  position: relative;
+  z-index: 1;
+  width: min(1180px, calc(100vw - 32px));
+  max-height: calc(100vh - 40px);
+  overflow: auto;
+  margin: 20px auto;
+  padding: 18px;
+  border-radius: 24px;
+  background: #fbf8f1;
+  border: 1px solid var(--line);
+  box-shadow: var(--shadow);
+}
+
 .detail-header {
  padding-bottom: 16px;
  border-bottom: 1px solid var(--line);
@ -313,6 +439,12 @@ button {
  margin-top: 18px;
 }

+.workflow-panels {
+  display: grid;
+  gap: 16px;
+  margin-top: 20px;
+}
+
 .detail-section {
  padding: 16px;
  border-radius: 18px;
@ -329,6 +461,44 @@ button {
  margin-top: 18px;
 }

+.workflow-panels .editor-panel,
+.workflow-panels .detail-section {
+  margin-top: 0;
+}
+
+.collapsible-panel {
+  padding-top: 14px;
+}
+
+.collapsible-header {
+  display: flex;
+  gap: 12px;
+  align-items: center;
+  justify-content: space-between;
+  flex-wrap: wrap;
+}
+
+.collapsible-header h3 {
+  margin-bottom: 0;
+}
+
+.collapsible-body {
+  margin-top: 16px;
+}
+
+.collapsible-panel.collapsed .collapsible-body {
+  display: none;
+}
+
+.document-panel-header {
+  display: flex;
+  gap: 16px;
+  align-items: flex-start;
+  justify-content: space-between;
+  flex-wrap: wrap;
+  margin-bottom: 14px;
+}
+
 .editor-label {
  display: block;
  margin: 0 0 8px;
@ -349,6 +519,11 @@ button {
  font-weight: 700;
 }

+.contributor-age-gate {
+  margin: 0;
+  font-weight: 400;
+}
+
 .archive-toggle input {
  width: 18px;
  height: 18px;
@ -372,6 +547,149 @@ button {
  gap: 12px;
 }

+.citation-list {
+  display: grid;
+  gap: 14px;
+}
+
+.citation-entry {
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
+.citation-entry-meta {
+  margin: 0 0 10px;
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.citation-entry-raw {
+  margin: 0 0 12px;
+  line-height: 1.5;
+}
+
+.citation-bibtex,
+.citation-bibtex-editor {
+  font-family: "Courier New", monospace;
+  font-size: 0.9rem;
+  line-height: 1.45;
+}
+
+.citation-abstract-shell {
+  display: grid;
+  gap: 8px;
+  margin: 4px 0 10px;
+}
+
+.citation-detail-shell {
+  display: grid;
+  gap: 8px;
+  margin: 4px 0 10px;
+}
+
+.citation-abstract-display {
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid var(--line);
+  background: rgba(15, 118, 110, 0.05);
+}
+
+.citation-detail-display {
+  padding: 10px 12px;
+  border-radius: 12px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.78);
+}
+
+.match-dialog-header,
+.match-dialog-grid,
+.match-candidate-header,
+.match-candidates,
+.match-candidate-card,
+.match-seed,
+.match-table {
+  display: grid;
+  gap: 12px;
+}
+
+.match-dialog-header {
+  grid-template-columns: minmax(0, 1fr) auto;
+  align-items: start;
+}
+
+.match-dialog-grid {
+  grid-template-columns: minmax(260px, 0.9fr) minmax(0, 1.6fr);
+  margin-top: 16px;
+}
+
+.match-candidate-card {
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.84);
+}
+
+.match-candidate-header {
+  grid-template-columns: minmax(0, 1fr) auto;
+  align-items: baseline;
+}
+
+.match-score {
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.match-table {
+  border: 1px solid var(--line);
+  border-radius: 14px;
+  overflow: hidden;
+}
+
+.match-row {
+  display: grid;
+  grid-template-columns: 120px 110px minmax(0, 1fr) minmax(0, 1fr);
+  gap: 10px;
+  padding: 10px 12px;
+  border-top: 1px solid var(--line);
+  font-size: 0.92rem;
+}
+
+.match-row:first-child {
+  border-top: 0;
+}
+
+.match-row-head {
+  background: rgba(15, 118, 110, 0.08);
+  font-weight: 700;
+}
+
+.match-label {
+  color: var(--muted);
+  font-weight: 700;
+}
+
+.match-status {
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+  font-size: 0.78rem;
+}
+
+.match-status-exact {
+  color: var(--accent);
+}
+
+.match-status-partial,
+.match-status-seed-missing,
+.match-status-candidate-missing {
+  color: var(--accent-2);
+}
+
+.match-status-conflict {
+  color: #a12626;
+}
+
 .audit-entry {
  padding: 14px;
  border-radius: 16px;
@ -394,6 +712,62 @@ button {
  line-height: 1.45;
 }

+.document-editor,
+.document-preview {
+  font-family: "Courier New", monospace;
+  font-size: 0.92rem;
+  line-height: 1.5;
+}
+
+.document-editor {
+  min-height: 420px;
+  margin-bottom: 14px;
+  white-space: pre;
+  overflow: auto;
+}
+
+.document-preview-shell {
+  border: 1px solid var(--line);
+  border-radius: 18px;
+  background: rgba(255, 255, 255, 0.72);
+  overflow: hidden;
+}
+
+.document-preview-shell summary {
+  cursor: pointer;
+  padding: 12px 16px;
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.document-preview {
+  padding: 0 16px 16px;
+}
+
+.document-preview-empty {
+  color: var(--muted);
+}
+
+.document-preview-list {
+  margin: 0;
+  padding-left: 22px;
+}
+
+.document-preview-list li + li {
+  margin-top: 8px;
+}
+
+.document-preview-metadata {
+  margin: 0 0 14px;
+  padding: 0;
+  list-style: none;
+  color: var(--muted);
+}
+
+.document-preview-metadata li + li {
+  margin-top: 6px;
+}
+
 .diagnostic-list {
  margin: 0;
  padding-left: 18px;
@ -403,6 +777,100 @@ button {
  margin-top: 8px;
 }

+.structured-node {
+  display: grid;
+  gap: 12px;
+  background: linear-gradient(180deg, rgba(255, 255, 255, 0.84), rgba(242, 247, 252, 0.88));
+}
+
+.structured-node + .structured-node {
+  margin-top: 4px;
+}
+
+.structured-node h3,
+.structured-node h4,
+.structured-node h5,
+.structured-node h6 {
+  line-height: 1.18;
+  letter-spacing: -0.01em;
+}
+
+.structured-node-body {
+  margin: 0;
+  line-height: 1.58;
+  color: var(--ink);
+}
+
+.structured-node-children {
+  display: grid;
+  gap: 12px;
+  padding: 4px 0 0 18px;
+  border-left: 2px solid rgba(36, 87, 166, 0.12);
+}
+
+.public-citation-list {
+  display: grid;
+  gap: 14px;
+}
+
+.public-bibliography-actions {
+  display: flex;
+  gap: 12px;
+  align-items: center;
+  flex-wrap: wrap;
+}
+
+.public-bibliography-note {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.public-citation-entry {
+  display: grid;
+  gap: 8px;
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
+.public-citation-text,
+.public-citation-meta,
+.public-citation-links,
+.public-citation-abstract {
+  margin: 0;
+}
+
+.public-citation-text {
+  line-height: 1.56;
+}
+
+.public-citation-meta,
+.public-citation-links {
+  color: var(--muted);
+  font-size: 0.92rem;
+}
+
+.public-citation-links a {
+  color: var(--accent);
+}
+
+.public-citation-abstract {
+  padding-top: 2px;
+  color: var(--muted);
+  line-height: 1.58;
+}
+
+.legacy-source {
+  max-height: 28rem;
+  overflow: auto;
+  padding: 14px;
+  border-radius: 16px;
+  border: 1px solid var(--line);
+  background: rgba(255, 255, 255, 0.76);
+}
+
 pre {
  margin: 0;
  white-space: pre-wrap;
@ -417,6 +885,15 @@ pre {
 }

@media (max-width: 960px) {
+  .site-header-inner {
+    flex-direction: column;
+    align-items: stretch;
+  }
+
+  .site-nav {
+    justify-content: flex-start;
+  }
+
  .workspace {
    grid-template-columns: 1fr;
  }
@ -424,4 +901,12 @@ pre {
  .species-list {
    max-height: 40vh;
  }
+
+  .match-dialog-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .match-row {
+    grid-template-columns: 1fr;
+  }
 }
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,5 +1,6 @@
 services:
  db:
+    container_name: ecospecies-db
    image: postgres:16-alpine
    environment:
      POSTGRES_DB: ecospecies
@ -17,6 +18,7 @@ services:
      - postgres_data:/var/lib/postgresql/data

  importer:
+    container_name: ecospecies-importer
    image: python:3.12-slim
    depends_on:
      db:
@ -30,11 +32,12 @@ services:
    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
    volumes:
      - .:/workspace
-      - ../01-legacy-code-and-data:/legacy-data:ro
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../legacy-corpus}:/legacy-data:ro
      - python_venv:/workspace/.docker/venv
      - pip_cache:/root/.cache/pip

  api:
+    container_name: ecospecies-api
    image: python:3.12-slim
    restart: unless-stopped
    depends_on:
@ -56,11 +59,12 @@ services:
      - "${ECOSPECIES_API_PORT:-8000}:8000"
    volumes:
      - .:/workspace
-      - ../01-legacy-code-and-data:/legacy-data:ro
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../legacy-corpus}:/legacy-data:ro
      - python_venv:/workspace/.docker/venv
      - pip_cache:/root/.cache/pip

  web:
+    container_name: ecospecies-web
    image: nginx:1.27-alpine
    restart: unless-stopped
    depends_on:
--- a/docs/citegeist-review-notes.md
+++ b/docs/citegeist-review-notes.md
@ -0,0 +1,110 @@
+## CiteGeist Review Notes
+
+These notes capture parser issues seen while integrating CiteGeist-style extraction into EcoSpecies.
+
+### Report-style references
+
+Observed failure shape:
+
+- references like `Daniell, W.C. 1872. Letters referring ... Comm. Rept. U.S. Comm. Fish & Fish. 2: 387-390.`
+- extracted `title` may contain the full raw bibliography string
+- abbreviated venue names such as `Comm. Rept.` are not separated cleanly from the title
+
+Suggested upstream change in `citegeist.extract`:
+
+- add a report-style parser path after year detection
+- prefer sentence-boundary venue detection before naive keyword splits so words like `report` inside a real title do not trigger an early cut
+- support abbreviation-heavy venue starters such as:
+  - `comm. rept.`
+  - `rept.`
+  - `proc.`
+  - `occas. pap.`
+  - `bulletin`
+  - `bull.`
+  - `memoir`
+- strip trailing volume/page blobs like `2: 387-390` from the venue field
+- when a first parse leaves a partial venue stub such as `Occas`, reparse the full raw reference line and prefer the fuller repaired venue/title split
+
+### Placeholder title merge behavior
+
+Observed failure shape:
+
+- a raw bibliography string may survive as `title` even after DOI/title resolution finds a better title
+
+Suggested upstream change in `citegeist.resolve.merge_entries_with_conflicts`:
+
+- treat titles that look like raw bibliography strings as placeholders
+- example heuristic:
+  - starts with `Surname, ... YEAR.`
+  - unusually long for a title
+  - contains a resolved shorter title as a substring after punctuation normalization
+
+### Legacy note deduplication
+
+Observed failure shape:
+
+- note fragments like `ecospecies_reference_number = {160}` can be appended more than once downstream when re-merging enriched metadata
+
+Suggested upstream change:
+
+- when joining note fragments, split on `;`, normalize whitespace, and dedupe per fragment rather than per whole note string
+
+### Unresolved entries should still refresh local parses
+
+Observed failure shape:
+
+- parser improvements may correctly rebuild `title`, venue, `volume`, `number`, and `pages`
+- but if no remote metadata source matches, the stored draft BibTeX can remain unchanged unless unresolved enrichment also writes the refreshed local seed back out
+
+Suggested upstream change:
+
+- unresolved enrichment should still return the rebuilt local draft entry
+- keep `citation_key`, normalized text, and draft BibTeX synchronized with the current local parser even when resolver status remains `unresolved`
+
+### Returned metadata not carried through
+
+Observed concern:
+
+- resolver/source payloads may include bibliographic details such as:
+  - `volume`
+  - `issue` / BibTeX `number`
+  - `page` / BibTeX `pages`
+- these should be preserved into the BibTeX entry whenever available
+
+Current note:
+
+- CiteGeist Crossref mapping already includes `volume`, `number`, and `pages`
+- verify that all resolver paths, storage round-trips, and exports preserve those fields consistently
+- OpenAlex/DataCite mappings should also be checked for analogous bibliographic fields in `biblio` / attribute payloads
+
+### False-positive title-search acceptance
+
+Observed failure shape:
+
+- title search can return a thematically related but bibliographically different work
+- downstream acceptance may keep some seed fields while adopting conflicting DOI/title/volume/pages from the returned match
+- this is especially risky for historical references with sparse or abbreviated venue names
+
+Suggested upstream change in `citegeist.resolve` and any title-search ranking path:
+
+- do not fall back to the first search hit when no strong title match exists
+- prefer exact or near-exact title matches only
+- reject a candidate when structured seed metadata conflicts on strong fields such as:
+  - `year`
+  - venue / journal
+  - `volume`
+  - `number`
+  - `pages`
+- treat those fields as match-validation inputs, not just merge-time metadata
+
+### OpenAlex null-source handling
+
+Observed failure shape:
+
+- some OpenAlex works have `primary_location` present but `source: null`
+- downstream mapping can crash if it assumes `source` is always a dictionary
+
+Suggested upstream change:
+
+- treat null `source` payloads as empty dictionaries
+- continue mapping title, year, DOI, and `biblio` fields even when venue/source is missing
--- a/docs/dc-orig.yml
+++ b/docs/dc-orig.yml
@ -0,0 +1,89 @@
+services:
+  db:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${ECOSPECIES_DB_NAME:-ecospecies}
+      POSTGRES_USER: ${ECOSPECIES_DB_USER:-ecospecies}
+      POSTGRES_PASSWORD: ${ECOSPECIES_DB_PASSWORD:?set ECOSPECIES_DB_PASSWORD}
+      PGDATA: /var/lib/postgresql/data/pgdata
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${ECOSPECIES_DB_USER:-ecospecies} -d ${ECOSPECIES_DB_NAME:-ecospecies}"]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+  importer:
+    image: python:3.12-slim
+    restart: "no"
+    depends_on:
+      db:
+        condition: service_healthy
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/legacy-data/InputFiles - TXT}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  api:
+    image: python:3.12-slim
+    restart: unless-stopped
+    depends_on:
+      db:
+        condition: service_healthy
+      importer:
+        condition: service_completed_successfully
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/legacy-data/InputFiles - TXT}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_HOST: 0.0.0.0
+      ECOSPECIES_PORT: "8000"
+      ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  web:
+    image: nginx:1.27-alpine
+    restart: unless-stopped
+    depends_on:
+      api:
+        condition: service_started
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=${TRAEFIK_NETWORK:-traefik-network}"
+      - "traefik.http.routers.ecospecies-atlas.rule=Host(`${ECOSPECIES_HOSTNAME:?set ECOSPECIES_HOSTNAME}`)"
+      - "traefik.http.routers.ecospecies-atlas.entrypoints=${TRAEFIK_ENTRYPOINTS:-websecure}"
+      - "traefik.http.routers.ecospecies-atlas.tls.certresolver=${TRAEFIK_CERTRESOLVER:-myresolver}"
+      - "traefik.http.services.ecospecies-atlas.loadbalancer.server.port=80"
+    volumes:
+      - ../apps/web:/usr/share/nginx/html:ro
+      - ../apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    networks:
+      - default
+      - traefik-network
+
+volumes:
+  postgres_data:
+  python_venv:
+  pip_cache:
+
+networks:
+  traefik-network:
+    external: true
+    name: ${TRAEFIK_NETWORK:-traefik-network}
--- a/docs/docker-compose-traefik.env.example
+++ b/docs/docker-compose-traefik.env.example
@ -0,0 +1,20 @@
+# Required
+ECOSPECIES_HOSTNAME=example.org
+ECOSPECIES_BASE_PATH=/apps/ecospecies
+ECOSPECIES_DB_PASSWORD=replace-with-strong-password
+
+# Optional database settings
+ECOSPECIES_DB_NAME=ecospecies
+ECOSPECIES_DB_USER=ecospecies
+
+# Optional application settings
+ECOSPECIES_AUTH_TOKENS=
+ECOSPECIES_DATA_DIR=/workspace/input-data/InputFiles
+
+# Optional host path to the legacy corpus if it is not at ../path-to-legacy-corpus
+ECOSPECIES_LEGACY_DATA_DIR=../path-to-legacy-corpus
+
+# Optional Traefik settings
+TRAEFIK_NETWORK=traefik-network
+TRAEFIK_ENTRYPOINTS=websecure
+TRAEFIK_CERTRESOLVER=myresolver
--- a/docs/docker-compose-traefik.yml
+++ b/docs/docker-compose-traefik.yml
@ -0,0 +1,93 @@
+services:
+  db:
+    container_name: ecospecies-db
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${ECOSPECIES_DB_NAME:-ecospecies}
+      POSTGRES_USER: ${ECOSPECIES_DB_USER:-ecospecies}
+      POSTGRES_PASSWORD: ${ECOSPECIES_DB_PASSWORD:?set ECOSPECIES_DB_PASSWORD}
+      PGDATA: /var/lib/postgresql/data/pgdata
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${ECOSPECIES_DB_USER:-ecospecies} -d ${ECOSPECIES_DB_NAME:-ecospecies}"]
+      interval: 5s
+      timeout: 5s
+      retries: 10
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+
+  importer:
+    container_name: ecospecies-importer
+    image: python:3.12-slim
+    restart: "no"
+    depends_on:
+      db:
+        condition: service_healthy
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/workspace/input-data/InputFiles}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-import.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-../path-to-legacy-corpus}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  api:
+    container_name: ecospecies-api
+    image: python:3.12-slim
+    restart: unless-stopped
+    depends_on:
+      db:
+        condition: service_healthy
+      importer:
+        condition: service_completed_successfully
+    working_dir: /workspace
+    environment:
+      ECOSPECIES_DATA_DIR: ${ECOSPECIES_DATA_DIR:-/workspace/input-data/InputFiles}
+      ECOSPECIES_DATABASE_URL: postgresql+psycopg://${ECOSPECIES_DB_USER:-ecospecies}:${ECOSPECIES_DB_PASSWORD}@db:5432/${ECOSPECIES_DB_NAME:-ecospecies}
+      ECOSPECIES_HOST: 0.0.0.0
+      ECOSPECIES_PORT: "8000"
+      ECOSPECIES_AUTH_TOKENS: ${ECOSPECIES_AUTH_TOKENS:-}
+      ECOSPECIES_VENV_DIR: /workspace/.docker/venv
+      PYTHONPATH: /workspace/apps/api/src
+    command: ["/bin/sh", "-lc", "./scripts/bootstrap-python-env.sh && ./scripts/run-api.sh"]
+    volumes:
+      - ..:/workspace
+      - ${ECOSPECIES_LEGACY_DATA_DIR:-/input-data}:/legacy-data:ro
+      - python_venv:/workspace/.docker/venv
+      - pip_cache:/root/.cache/pip
+
+  web:
+    container_name: ecospecies-web
+    image: nginx:1.27-alpine
+    restart: unless-stopped
+    depends_on:
+      api:
+        condition: service_started
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=${TRAEFIK_NETWORK:-traefik-network}"
+      - "traefik.http.routers.ecospecies-atlas.rule=Host(`${ECOSPECIES_HOSTNAME:?set ECOSPECIES_HOSTNAME}`) && PathPrefix(`${ECOSPECIES_BASE_PATH:-/}`)"
+      - "traefik.http.routers.ecospecies-atlas.entrypoints=${TRAEFIK_ENTRYPOINTS:-websecure}"
+      - "traefik.http.routers.ecospecies-atlas.tls.certresolver=${TRAEFIK_CERTRESOLVER:-myresolver}"
+      - "traefik.http.services.ecospecies-atlas.loadbalancer.server.port=80"
+    volumes:
+      - ../apps/web:/usr/share/nginx/html:ro
+      - ../apps/web/nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    networks:
+      - default
+      - traefik-network
+
+volumes:
+  postgres_data:
+  python_venv:
+  pip_cache:
+
+networks:
+  traefik-network:
+    external: true
+    name: ${TRAEFIK_NETWORK:-traefik-network}
--- a/docs/postgres-backup.md
+++ b/docs/postgres-backup.md
@ -0,0 +1,48 @@
+# PostgreSQL Backup Notes
+
+This note applies to deployments that use the PostgreSQL volume defined by the Compose stack, including the Traefik deployment variant.
+
+## What Needs Backup
+
+At minimum, back up:
+
+- the PostgreSQL data volume
+- the deployment env file that contains the database credentials
+
+For the Traefik deployment variant, that usually means:
+
+- the Docker volume `postgres_data`
+- `docs/docker-compose-traefik.env`
+
+## Logical Backup
+
+From the repository root, create a SQL dump with:
+
+```bash
+./scripts/backup-postgres.sh
+```
+
+To write to a specific file:
+
+```bash
+./scripts/backup-postgres.sh /path/to/ecospecies-backup.sql
+```
+
+## Restore From Logical Backup
+
+Restore a SQL dump with:
+
+```bash
+./scripts/restore-postgres.sh /path/to/ecospecies-backup.sql
+```
+
+## Volume-Level Backup
+
+If the host backup system can snapshot Docker volumes safely, include the PostgreSQL volume in that schedule. A volume snapshot is useful for full recovery, but a logical dump is still recommended for portability and validation.
+
+## Operational Guidance
+
+- Run backups on a schedule instead of relying on ad hoc dumps.
+- Test restore procedures before relying on the backup policy.
+- Keep backup artifacts outside the live Docker host when possible.
+- The backup and restore scripts default to `docs/docker-compose-traefik.env` and `docs/docker-compose-traefik.yml`, but both can be overridden with `ECOSPECIES_ENV_FILE` and `ECOSPECIES_COMPOSE_FILE`.
--- a/docs/roadmap.md
+++ b/docs/roadmap.md
@ -1,5 +1,22 @@
 # EcoSpecies Modernization Roadmap

+## Current Status
+
+As of 2026-03-27, the repo is no longer at the pure planning stage. The following pieces are already implemented and working in the live stack:
+
+- Docker Compose deployment with explicit `ecospecies-...` container names
+- path-based hosting support for `/apps/ecospecies`
+- in-repo-only source directory resolution with safe path validation
+- legacy SLH ingest into PostgreSQL-backed species, sections, citations, audit, and document records
+- editor/admin workflows for draft, review, publish, archive, and audit history
+- contributor registration and draft-authoring workflow with token-based access
+- structured Markdown document storage and editor/API round-trip
+- persisted taxon identifier scaffolding with legacy identifiers separated from future-facing external identifiers
+- citation extraction, review, enrichment, batch enrichment, candidate matching, and reviewed-candidate selection/addition
+- citation persistence back into the structured Markdown source of truth
+
+The roadmap below has been updated to reflect that actual state.
+
 ## Target Product

 Create a Docker Compose-based, open-source EcoSpecies successor that:
@ -31,48 +48,91 @@ Create a Docker Compose-based, open-source EcoSpecies successor that:

 ### Phase 0: Discovery and migration planning

+Status: completed
+
 - Inventory legacy assets and user-facing capabilities.
 - Capture the replacement architecture and ingestion strategy.
 - Define acknowledgements, provenance, and licensing boundaries.

 ### Phase 1: Ingestion foundation

+Status: substantially complete, with parser refinement ongoing
+
 - Parse legacy `.txt` SLH inputs into structured JSON records.
- Normalize common metadata: title, scientific name, common name, FLELMR code, headings, references.
+- Normalize common metadata: title, scientific name, common name, FLELMR/EcoSpecies code, headings, references.
 - Create ingest diagnostics to flag malformed files and missing metadata.
+- Continue parser refinement for legacy edge cases in headings, citations, and historical bibliography formats.

 ### Phase 2: Public read experience

+Status: implemented baseline
+
 - Species listing and search.
 - Species detail view with section navigation.
 - Provenance and acknowledgement display.
 - Summary metrics on corpus coverage.
+- Path-based deployment under `/apps/ecospecies`.

-### Phase 3: Structured persistence
+### Phase 3: Structured persistence and editorial workflow

- Move parsed content into PostgreSQL.
- Add editor-safe import jobs and audit metadata.
- Preserve raw source alongside normalized records.
- Establish authentication and role-based access for editor and admin workflows.
- Add persisted editorial workflow state for draft, review, and published records.
- Make document sections individually addressable for editor review and revision, with audit history for section-level changes.
+Status: implemented baseline, with editor UX still maturing

-### Phase 4: Linkages and visualization
+- PostgreSQL-backed persistence for species, sections, citations, documents, taxon identifiers, and audit history.
+- Editor-safe import jobs and audit metadata.
+- Raw-source preservation alongside normalized records.
+- Authentication and role-based access for admin/editor/contributor workflows.
+- Persisted editorial workflow state for draft, review, published, and archived records.
+- Structured Markdown document storage and round-trip editing.
+- Citation review, enrichment, candidate selection, and reviewed-candidate addition.
+- Contributor draft creation and owner-scoped editing.
+
+### Phase 4: Standards-aware identity and bibliography
+
+Status: partially implemented
+
+- Preserve legacy local identifiers as provenance.
+- Persist taxon identifiers separately from legacy identifiers.
+- Expose `legacy_identifiers`, `taxon_identifiers`, and `primary_taxon_*` API fields.
+- Persist structured citation records with DOI/OpenAlex/DataCite-style enrichment fields.
+- Continue toward multi-authority identifier review, richer citation entities, and CiteGeist-backed bibliography expansion.
+
+### Phase 5: Editor ergonomics and advanced review
+
+Status: in progress
+
+- Structured Markdown editor is live.
+- Citation match-review dialog is live.
+- Remaining work:
+  - CodeMirror-based Markdown editor with folding
+  - inline parser diagnostics in the editor
+  - richer citation diff/review affordances
+  - clearer document-node and citation provenance in the UI
+
+### Phase 6: Linkages and visualization
+
+Status: not started

 - Model predator/prey, habitat, and ecological association edges.
 - Add graph endpoints and species-relationship views.
 - Support public-friendly visual explanations and expert filters.

-### Phase 5: Reports and export
+### Phase 7: Reports and export

- Recreate legacy-like text/RTF export.
- Add machine-readable export formats such as JSON and Markdown.
- Support FLELMR-oriented authoring/export profiles.
+Status: partially implemented

-### Phase 6: Assisted research workflows
+- JSON and Markdown exports exist through the API/document model.
+- Structured Markdown is now the primary human-readable editor/export format.
+- Remaining work:
+  - recreate legacy-like text/RTF export
+  - support export profiles for legacy compatibility and standards-forward outputs
+  - improve citation/bibliography export fidelity
+
+### Phase 8: Assisted research workflows
+
+Status: planned

 - Add local-LLM-assisted extraction and drafting in a human-review loop.
- Integrate bibliography tooling for citation consolidation.
+- Integrate bibliography tooling for citation consolidation and topic expansion.
 - Support candidate-species intake for records not yet in the historical corpus.
 - Restrict assisted drafting and publication actions to authenticated editorial roles.

@ -84,6 +144,9 @@ Initial core entities:
 - `source_document`
 - `document_section`
 - `citation`
+- `taxon_identifier`
+- `citation_identifier`
+- `bibliography_topic`
 - `taxon`
 - `linkage`
 - `media_asset`
@ -95,6 +158,7 @@ Key design rules:
 - retain provenance and import timestamps
 - separate public published records from draft/editor states
 - make sections addressable for citation and graph linking
+- prefer a canonical document AST over direct projection from free-form source text

 ## LLM Extension Strategy

@ -103,6 +167,8 @@ Use local models only for assistive tasks, never silent publication:
 - extracting candidate structured fields from new SLH text
 - suggesting missing headings or linkage labels
 - clustering similar citations
+- resolving bibliography entries toward DOI/OpenAlex/DataCite where available
+- treating local legacy codes as provenance, not canonical identifiers
 - drafting summaries for editor review

 Guardrails:
@ -111,16 +177,19 @@ Guardrails:
 - all generated content is marked as draft
 - every automated extraction stores source spans where possible

-## Development Roadmap
+## Near-Term Priorities

-1. Implement a thin ingestion API over the legacy text corpus.
-2. Build a responsive browser UI for listing and viewing species.
-3. Add a persistent PostgreSQL-backed ingest store.
-4. Introduce export and visualization services.
-5. Add editorial workflows and local-LLM assistance.
+1. Add CodeMirror-based folding and structure-aware editing to the Markdown document editor.
+2. Expand taxon identifier review workflows for WoRMS, GBIF, Catalogue of Life, and related authorities.
+3. Deepen citation quality controls, including better parsed-field visibility and stricter/manual review loops where resolver confidence is weak.
+4. Add CiteGeist-style topic expansion and bibliography-suggestion review for under-cited species.
+5. Improve document export fidelity so reviewed citations and standards-based identifiers are clearly represented in Markdown and downstream exports.
+6. Begin the first ecological-linkage data model and API endpoints once citation/identifier workflows stabilize.

 ## Definition Of Done For The Initial Milestone

 - `docker compose up` starts a working API and frontend.
- The system can enumerate the legacy corpus and show parsed species detail for at least one real SLH file.
- Project docs describe the migration approach, target architecture, and next phases.
+- The system can enumerate the legacy corpus and show parsed species detail for real SLH files.
+- Editors can curate structured Markdown documents and citations through authenticated workflows.
+- Contributors can register, create drafts, and edit only their own submissions.
+- Project docs describe both the implemented modernization state and the next phases.
--- a/docs/standards-migration-plan.md
+++ b/docs/standards-migration-plan.md
@ -0,0 +1,315 @@
+# EcoSpecies Standards Migration Plan
+
+## Problem
+
+The current EcoSpecies ingest and document model still treats legacy local fields such as `FLELMR code` / `species_code` as if they were primary identifiers. That is useful for historical provenance, but it is the wrong long-term center of gravity for a broader, modern biodiversity knowledge system.
+
+The same problem exists for citations:
+
+- legacy plaintext reference blocks are treated as local document text,
+- citation identity is weak or missing,
+- bibliography growth is tied to what happened to appear in the historical SLH file.
+
+The new system should preserve legacy local identifiers and references, but it should not be structurally bound to them.
+
+## Direction
+
+Treat legacy local codes and freeform references as import-era artifacts, not canonical future-facing identifiers.
+
+Going forward, EcoSpecies should prefer broadly recognized identifiers and registries:
+
+- taxonomic name authority and taxon identifiers:
+  - Catalogue of Life IDs and release DOIs
+  - GBIF taxon keys
+  - WoRMS AphiaIDs for marine taxa
+  - ITIS TSNs where relevant
+  - optional NCBI Taxonomy IDs for research interoperability
+- literature and dataset identifiers:
+  - DOI as the primary publication/dataset identifier
+  - ISBN/ISSN where DOI is absent
+  - OpenAlex IDs and DataCite metadata as enrichment layers
+- contributor identity:
+  - email-based local contributor accounts now
+  - optional ORCID linkage later for editor and contributor identity
+
+The system should be marine-forward because that matches the historical corpus, but not marine-exclusive. Identifier strategy should therefore be authority-aware rather than tied to a single domain-specific registry.
+
+## Authority Selection Strategy
+
+Choose the primary taxon authority by best-fit coverage, not by a single global rule.
+
+- marine taxa:
+  - prefer WoRMS AphiaID as primary when confidently matched
+  - retain GBIF and Catalogue of Life as crosswalks
+- non-marine or mixed-domain taxa:
+  - prefer Catalogue of Life or GBIF as primary, depending on match quality and coverage
+  - retain ITIS and other relevant identifiers as crosswalks
+- unresolved or conflicting cases:
+  - store all candidate identifiers
+  - require editorial review before a primary identifier is asserted
+
+This keeps the project ready for terrestrial expansion without discarding the value of WoRMS for the present corpus.
+
+## Important Taxonomic Note
+
+PhyloCode is relevant for clade naming, not as a general-purpose replacement for species-level registry IDs. It should not become the primary EcoSpecies species identifier layer. It may be useful later for clade-aware ontology and higher-level phylogenetic naming, but not as the main substitute for local `species_code` values.
+
+## Core Design Rules
+
+1. Legacy local identifiers remain preserved exactly as imported.
+2. Canonical taxon identity becomes multi-authority, not single-local-code.
+3. Citations become first-class structured entities, not just text inside a section.
+4. Bibliographies can be extended by topic and citation graph, not only by source-document inheritance.
+5. Exports keep provenance visible so readers can distinguish legacy source metadata from normalized external identifiers.
+
+## Schema Changes
+
+### Species metadata
+
+Retain `flelmr_code` for provenance, but demote it to a legacy metadata field.
+
+Add a taxon-identity layer:
+
+- `taxon_name_usage`
+- `taxon_identifier`
+- `taxon_authority`
+- `taxon_match_review`
+
+Suggested fields:
+
+- `taxon_identifier.authority`
+- `taxon_identifier.identifier`
+- `taxon_identifier.rank`
+- `taxon_identifier.label`
+- `taxon_identifier.is_primary`
+- `taxon_identifier.source_url`
+- `taxon_identifier.asserted_by`
+- `taxon_identifier.match_confidence`
+- `taxon_identifier.review_status`
+
+Examples:
+
+- `authority = "worms", identifier = "159059", label = "AphiaID"`
+- `authority = "gbif", identifier = "2290910", label = "taxonKey"`
+- `authority = "col", identifier = "5T7L7", label = "taxonID"`
+- `authority = "itis", identifier = "161989", label = "TSN"`
+- `authority = "legacy-ecospecies", identifier = "5192", label = "FLELMR"`
+
+### Citation model
+
+Move from section text to structured bibliography entities:
+
+- `citation`
+- `citation_identifier`
+- `citation_relation`
+- `species_citation`
+- `document_node_citation`
+- `bibliography_topic`
+
+Suggested citation identifier types:
+
+- DOI
+- ISBN
+- ISSN
+- PMID
+- arXiv
+- OpenAlex
+- URL
+
+## Markdown / AST Changes
+
+Update the constrained Markdown profile so metadata stops implying that `species_code` is canonical.
+
+Replace the current front matter recommendation:
+
+```md
+species_code: 5192
+```
+
+with a provenance-oriented shape:
+
+```md
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+  - authority: gbif
+    identifier: 2290910
+    label: taxonKey
+```
+
+Also add explicit bibliography sections:
+
+```md
+## References
+
+- id: doi:10.1000/example
+  text: Smith, J. 2024. Example paper...
+  relation: cites
+
+## Suggested Reading
+
+- topic: estuarine ecology
+```
+
+The AST should preserve:
+
+- legacy identifiers
+- normalized taxon identifiers
+- structured references
+- topic links used for bibliography expansion
+
+## Import Pipeline Changes
+
+### Species identity
+
+Import should produce:
+
+1. raw imported name fields,
+2. legacy local identifiers,
+3. unresolved candidate taxon identifiers,
+4. optional matched external identifiers,
+5. a review state for unresolved or conflicting authority matches.
+
+Do not block ingest if no external authority match exists. Store the unresolved state explicitly.
+
+Primary identifier assignment should be determined by:
+
+1. domain fit of the authority
+2. confidence of the match
+3. editorial review status
+4. future ability to crosswalk to other authorities
+
+### Citations
+
+Split citation processing into stages:
+
+1. detect bibliography/reference sections in the imported SLH text,
+2. extract plaintext reference strings,
+3. convert plaintext references into draft structured entries,
+4. enrich identifiers and metadata,
+5. assign accepted citations back to species and document nodes,
+6. optionally expand bibliography by topic and citation graph.
+
+## CiteGeist Integration
+
+`../CiteGeist` is a strong fit for this migration.
+
+Observed capabilities in that repo already cover much of what EcoSpecies needs:
+
+- extracting references from plaintext,
+- converting rough references into draft structured entries,
+- DOI/Crossref/DataCite/OpenAlex enrichment,
+- citation graph expansion,
+- topic-based bibliography expansion,
+- duplicate clustering and canonicalization.
+
+### Recommended integration boundary
+
+Do not embed CiteGeist logic directly into the EcoSpecies parser.
+
+Instead:
+
+1. EcoSpecies exports candidate plaintext references and topic phrases.
+2. CiteGeist processes and enriches them into structured bibliography data.
+3. EcoSpecies imports reviewed citation outputs into its own `citation` tables.
+
+### First integration targets
+
+- species-level bibliography cleanup from `References` sections
+- DOI resolution and identifier assignment
+- duplicate detection across species bibliographies
+- topic expansion for subject areas such as habitat, trophic ecology, reproduction, invasive biology, and fisheries context
+
+### Later integration targets
+
+- node-level citation attachment
+- bibliography review UI
+- suggested-reading generation per species
+- topic-seeded bibliography augmentation for under-cited species drafts
+
+## API Changes
+
+Add standards-aware endpoints:
+
+- `/api/species/<slug>/identifiers`
+- `/api/species/<slug>/citations`
+- `/api/species/<slug>/bibliography/topics`
+- `/api/editor/species/<slug>/identifier-review`
+- `/api/editor/species/<slug>/citation-review`
+
+Do not remove legacy fields immediately. Keep `flelmr_code` in payloads for compatibility while introducing:
+
+- `legacy_identifiers`
+- `taxon_identifiers`
+- `primary_taxon_identifier`
+
+## UI Changes
+
+The species detail page should distinguish:
+
+- scientific name
+- primary external taxon identifier
+- legacy local identifiers
+- bibliography
+- suggested reading
+
+Editors should see:
+
+- unresolved authority matches
+- conflicting taxon IDs
+- citation enrichment candidates
+- duplicate-reference clusters
+
+Contributors should only author content and draft references; identifier normalization and bibliography publication remain editorial functions.
+
+## Migration Phases
+
+### Phase A: Demote legacy code
+
+- Rename internal presentation from “species code” to “legacy identifier”.
+- Keep `flelmr_code` only as legacy provenance.
+- Add `legacy_identifiers` to Markdown export and AST.
+
+### Phase B: Add external taxon identifiers
+
+- Create taxon-identifier tables and API payloads.
+- Add editor review workflows for selecting a primary authority identifier.
+- Default marine taxa review toward WoRMS where available.
+- Default broader cross-domain review toward Catalogue of Life and GBIF where WoRMS is not the right authority.
+- Keep the model open to terrestrial species from the beginning rather than treating them as out-of-scope exceptions.
+
+### Phase C: Structured bibliography
+
+- Create citation tables.
+- Extract plaintext references from imported documents.
+- Store draft citations separately from accepted citations.
+
+### Phase D: CiteGeist bridge
+
+- Define import/export format between EcoSpecies and CiteGeist.
+- Run draft-reference normalization and DOI enrichment.
+- Import reviewed structured citations back into EcoSpecies.
+
+### Phase E: Topic-aware bibliography growth
+
+- Store species topic phrases.
+- Use CiteGeist topic expansion for bibliography augmentation.
+- Keep added citations flagged by source type:
+  - imported
+  - resolved
+  - topic-expanded
+  - editor-added
+
+## Immediate Next Steps
+
+1. Update the Markdown profile to replace `species_code` with `legacy_identifiers` plus `taxon_identifiers`.
+2. Add `legacy_identifiers` and `taxon_identifiers` to the AST/document model.
+3. Introduce taxon identifier tables in the PostgreSQL schema.
+4. Define a minimal EcoSpecies-to-CiteGeist interchange format for plaintext references and topic phrases.
+5. Add editor-facing citation review before attempting automatic bibliography publication.
--- a/docs/structured-markdown-plan.md
+++ b/docs/structured-markdown-plan.md
@ -0,0 +1,338 @@
+# Structured Markdown Document Plan
+
+## Goal
+
+Replace the current flat, parser-heavy free-form text handling with a document model that is:
+
+- human-readable in plaintext
+- editable in the browser with hierarchy folding
+- permissive-license friendly
+- suitable for first-pass conversion from legacy SLH text files
+- suitable as the primary export format for a species life history
+- able to project cleanly into a flexible database model with greater hierarchical depth
+
+## Recommendation
+
+Adopt a constrained Markdown-based authoring format as the primary human-facing document format, backed by an internal hierarchical document AST and a relational projection layer in PostgreSQL.
+
+Use this three-layer model:
+
+1. Source and export format: constrained EcoSpecies Markdown
+2. Canonical application representation: hierarchical AST
+3. Database representation: relational projection for querying, indexing, publishing, and editorial workflows
+
+This avoids treating raw free-form text as both the storage format and the parser input.
+
+## Why Markdown Instead Of Org
+
+Markdown is the better fit for this codebase and licensing requirement because:
+
+- it is familiar to most users
+- it is easier to constrain than Org
+- it maps naturally to hierarchical headings
+- it works well with CodeMirror folding
+- it does not require adopting GPL or AGPL editor code
+
+Org-style authoring remains conceptually attractive, but embedding Org-specific tooling such as organice would introduce copyleft code, which is not aligned with a permissive-only implementation strategy.
+
+## EcoSpecies Markdown Profile
+
+The format should be Markdown-like, but intentionally narrower than unrestricted Markdown.
+
+### Metadata
+
+Use YAML front matter for canonical metadata fields:
+
+```md
+---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+taxon_identifiers:
+  - authority: worms
+    identifier: 159059
+    label: AphiaID
+    primary: true
+source_file: American Oyster SLH NOAA SEA.txt
+publication_status: published
+---
+```
+
+Recommended canonical fields:
+
+- `title`
+- `common_name`
+- `scientific_name`
+- `legacy_identifiers`
+- `taxon_identifiers`
+- `primary_taxon_authority`
+- `source_file`
+- `publication_status`
+- `source_format`
+- `legacy_import_id`
+
+### Hierarchy
+
+Use headings as the sole structure-bearing primitive.
+
+Example:
+
+```md
+---
+title: American Oyster
+common_name: American Oyster
+scientific_name: Crassostrea virginica
+legacy_identifiers:
+  - authority: legacy-ecospecies
+    identifier: 5192
+    label: FLELMR
+---
+
+## Summary
+Short editor-reviewed abstract.
+
+## Habitat
+
+### Type
+Estuarine.
+
+### Substrate
+Hard bottom, shell, mud flats, and other suitable settlement surfaces.
+
+## Reproduction
+
+### Season
+Spawning occurs from spring through fall in much of the Gulf.
+```
+
+Rules:
+
+- Heading depth is meaningful.
+- Skip-level headings should be rejected or normalized.
+- Body text belongs to the nearest preceding heading.
+- `#` level is optional if the document title already exists in front matter.
+- Tables, lists, and citations are allowed only where explicitly supported.
+- Arbitrary embedded HTML should be disallowed.
+
+### Citations
+
+Keep citations readable in Markdown but structured enough to parse.
+
+Preferred first-pass shape:
+
+```md
+## Citations
+
+- [7] Ahmed, M. 1975. Speciation in living oysters. Advances in Marine Biology 13:357-397.
+- [15] Andrews, J.D. 1979. Pelecypoda: Ostreidae. Reproduction of Marine Invertebrates...
+```
+
+This is intentionally simpler than trying to infer citations from arbitrary prose.
+
+## Canonical AST
+
+Markdown should not be the sole internal representation. Parse it into an AST that preserves hierarchy explicitly.
+
+Example conceptual shape:
+
+```json
+{
+  "metadata": {
+    "title": "American Oyster",
+    "common_name": "American Oyster",
+    "scientific_name": "Crassostrea virginica",
+    "legacy_identifiers": [
+      {
+        "authority": "legacy-ecospecies",
+        "identifier": "5192",
+        "label": "FLELMR"
+      }
+    ]
+  },
+  "nodes": [
+    {
+      "id": "n1",
+      "type": "section",
+      "depth": 2,
+      "title": "Summary",
+      "body": "Short editor-reviewed abstract.",
+      "children": []
+    },
+    {
+      "id": "n2",
+      "type": "section",
+      "depth": 2,
+      "title": "Habitat",
+      "body": "",
+      "children": [
+        {
+          "id": "n3",
+          "type": "section",
+          "depth": 3,
+          "title": "Type",
+          "body": "Estuarine.",
+          "children": []
+        }
+      ]
+    }
+  ]
+}
+```
+
+Required AST properties:
+
+- arbitrary hierarchical depth
+- stable node identifiers
+- separate metadata from body structure
+- support for editor audit and provenance
+- support for extracting source spans from imported legacy text when available
+
+## Database Direction
+
+The current flat `document_section` model should evolve into a general document tree.
+
+Suggested core tables:
+
+- `species_document`
+- `species_document_node`
+- `species_document_node_revision`
+- `species_document_metadata`
+- `citation`
+- `species_document_export`
+
+Suggested `species_document_node` fields:
+
+- `id`
+- `document_id`
+- `parent_id`
+- `position`
+- `depth`
+- `node_type`
+- `title`
+- `body_markdown`
+- `body_plaintext`
+- `source_heading`
+- `source_span_start`
+- `source_span_end`
+
+This enables:
+
+- greater hierarchical depth
+- stable editor operations on subtrees
+- future insertion of machine-extracted nested content
+- simplified export back to Markdown
+
+## Import Flow
+
+The legacy text parser should no longer attempt to infer the final database structure directly.
+
+Instead:
+
+1. Parse raw legacy text into a best-effort intermediate tree.
+2. Normalize extracted metadata.
+3. Emit constrained Markdown.
+4. Parse constrained Markdown into AST.
+5. Persist AST and project relationally.
+6. Record diagnostics on uncertain conversions.
+
+This changes the parser’s role from “infer final structure perfectly” to “produce a reviewable first draft”.
+
+## Editor Flow
+
+The web editor should operate primarily on the Markdown representation, with a structured parse running on save or preview.
+
+Recommended behavior:
+
+- fold by heading depth in CodeMirror
+- validate front matter and heading structure
+- preview rendered sections
+- show parser diagnostics inline
+- save both Markdown source and parsed AST
+
+The editor should reject or flag:
+
+- invalid front matter
+- duplicate canonical metadata keys
+- heading depth jumps
+- malformed citation entries in structured sections
+
+## Export Policy
+
+Markdown should be the primary export format for a species life history.
+
+Export targets:
+
+- constrained Markdown for editorial interchange
+- JSON AST for machine workflows
+- derived relational/API payloads for the application
+- optional report-oriented exports later
+
+The export path should be:
+
+- database document tree -> canonical AST -> constrained Markdown
+
+This ensures the exported plaintext remains stable and human-readable.
+
+## Migration Strategy
+
+### Stage 1: Introduce the document model
+
+- add AST schema and persistence tables
+- keep existing section-based reads working
+- build Markdown import/export helpers
+
+### Stage 2: Convert current parser output
+
+- map current parsed sections into Markdown drafts
+- preserve existing metadata and diagnostics
+- store generated Markdown alongside current records
+
+### Stage 3: Introduce Markdown editor
+
+- add CodeMirror-based editor with heading folding
+- add validation for front matter and heading structure
+- add round-trip save through AST
+
+### Stage 4: Move public reads to the new document model
+
+- generate current API responses from the hierarchical document tree
+- keep compatibility shims for legacy flat sections where needed
+
+### Stage 5: Expand structured extraction
+
+- add deeper parsing for habitat, reproduction, citations, and linkages
+- add richer projections from AST to relational tables
+
+## Immediate Implementation Tasks
+
+Recommended first engineering tasks:
+
+1. Define the constrained Markdown grammar and validation rules.
+2. Design the AST schema and PostgreSQL tables.
+3. Add Markdown import/export utilities in the API service.
+4. Prototype a CodeMirror editor with heading folding.
+5. Add a migration command that converts current species records into Markdown drafts.
+6. Preserve current endpoints while introducing the document-tree backing model.
+
+## Non-Goals For The First Pass
+
+- full unrestricted Markdown feature support
+- WYSIWYG editing
+- arbitrary embedded HTML
+- perfect citation parsing from all legacy free text
+- replacing every existing API shape immediately
+
+## Decision Summary
+
+The planned direction is:
+
+- constrained Markdown as the editable and exportable document format
+- internal AST as the canonical application representation
+- relational projection for queryable application state
+- CodeMirror-based browser editing with heading folding
+
+This is the most practical path toward human-editable hierarchy, permissive-only implementation, cleaner parsing, and deeper long-term document structure.
--- a/docs/traefik-deploy.md
+++ b/docs/traefik-deploy.md
@ -0,0 +1,79 @@
+# Traefik Deployment Notes
+
+This note applies to the reverse-proxy deployment variant in `docs/docker-compose-traefik.yml`.
+
+## Start The Stack
+
+From the repository root:
+
+```bash
+cp docs/docker-compose-traefik.env.example docs/docker-compose-traefik.env
+# edit docs/docker-compose-traefik.env
+docker compose \
+  --env-file docs/docker-compose-traefik.env \
+  -f docs/docker-compose-traefik.yml \
+  up -d
+```
+
+## Common Failure Modes
+
+### Traefik cannot reach the web container
+
+Check:
+
+- the external Docker network named by `TRAEFIK_NETWORK` exists
+- the Traefik instance is attached to that same Docker network
+- the hostname in `ECOSPECIES_HOSTNAME` matches the Traefik router rule you expect
+- the path in `ECOSPECIES_BASE_PATH` matches the published application prefix, for example `/apps/ecospecies`
+
+### The site opens but the API fails
+
+Check:
+
+- the `api` service is healthy and running
+- the `web` service is using the repo's `apps/web/nginx.conf`
+- the `api` service finished waiting for `importer`
+- the request path is under `ECOSPECIES_BASE_PATH` if you are publishing the app below a domain root
+
+### Importer fails on startup
+
+Check:
+
+- `ECOSPECIES_LEGACY_DATA_DIR` points to a real host path
+- that path contains `InputFiles - TXT`
+- the mount is readable by Docker on the target host
+
+### Database does not initialize
+
+Check:
+
+- `ECOSPECIES_DB_PASSWORD` is set
+- the PostgreSQL volume is writable
+- an old incompatible volume is not being reused unintentionally
+
+### Editor login works but no editor state is available
+
+Check:
+
+- `ECOSPECIES_AUTH_TOKENS` is set on the `api` service
+- the token you entered matches the configured value exactly
+
+## Operational Notes
+
+- This deployment variant intentionally exposes only the `web` container to Traefik.
+- The `api`, `db`, and `importer` services stay on the internal Compose network.
+- The `importer` runs before the API starts and seeds or synchronizes the dataset.
+- The web container serves both the domain root and `/apps/ecospecies/`, but the Traefik router should target the intended public path.
+
+## Apache Front Door
+
+If Apache is the public front door for the hostname in `ECOSPECIES_HOSTNAME`, it must proxy the configured `ECOSPECIES_BASE_PATH` onward. Otherwise Apache can return its own `Not Found` page before the EcoSpecies stack sees the request.
+
+Example Apache directives:
+
+```apache
+ProxyPass        /apps/ecospecies http://127.0.0.1:80/apps/ecospecies
+ProxyPassReverse /apps/ecospecies http://127.0.0.1:80/apps/ecospecies
+```
+
+Point the backend address at the actual Traefik listener on the host if it is not `127.0.0.1:80`, and adjust the published path if `ECOSPECIES_BASE_PATH` is different.
--- a/scripts/backfill-citations.py
+++ b/scripts/backfill-citations.py
@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from ecospecies_api.repository import (
+    get_editor_species_citations,
+    get_editor_species_list,
+    update_species_citation_enrichment,
+)
+
+
+def should_backfill(citation: dict[str, object], include_accepted: bool) -> bool:
+    review_status = str(citation.get("review_status", "")).strip().lower()
+    source_type = str(citation.get("source_type", "")).strip().lower()
+    enrichment_status = str(citation.get("enrichment_status", "")).strip().lower()
+    normalized_text = str(citation.get("normalized_text", "")).strip()
+    abstract_text = str(citation.get("abstract_text", "")).strip()
+
+    if not include_accepted and review_status == "accepted":
+        return False
+    if source_type in {"editor_selected_candidate", "editor_added_candidate"} and not include_accepted:
+        return False
+
+    return (
+        source_type in {"document_extract", "editor_review", ""}
+        or enrichment_status in {"pending", "unresolved", "error", ""}
+        or not normalized_text
+        or not abstract_text
+    )
+
+
+def reorder_species_with_cursor(
+    species_items: list[dict[str, object]],
+    state_file: Path | None,
+) -> list[dict[str, object]]:
+    if not state_file or not species_items:
+        return species_items
+
+    try:
+        last_slug = state_file.read_text(encoding="utf-8").strip()
+    except FileNotFoundError:
+        return species_items
+
+    if not last_slug:
+        return species_items
+
+    for index, item in enumerate(species_items):
+        if str(item.get("slug", "")).strip() == last_slug:
+            return species_items[index + 1 :] + species_items[: index + 1]
+    return species_items
+
+
+def write_cursor(state_file: Path | None, slug: str) -> None:
+    if not state_file or not slug:
+        return
+    state_file.parent.mkdir(parents=True, exist_ok=True)
+    state_file.write_text(f"{slug}\n", encoding="utf-8")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Backfill EcoSpecies citation enrichment.")
+    parser.add_argument("--slug", help="Limit the backfill to a single species slug.")
+    parser.add_argument("--username", default="citation-backfill", help="Audit username to record.")
+    parser.add_argument(
+        "--include-accepted",
+        action="store_true",
+        help="Also rerun accepted/editor-curated citations.",
+    )
+    parser.add_argument(
+        "--max-species",
+        type=int,
+        default=0,
+        help="Stop after this many species with eligible citations. 0 means no limit.",
+    )
+    parser.add_argument(
+        "--max-citations",
+        type=int,
+        default=0,
+        help="Stop after this many citations overall. 0 means no limit.",
+    )
+    parser.add_argument(
+        "--state-file",
+        help="Optional cursor file used to rotate scheduled runs through the species list.",
+    )
+    args = parser.parse_args()
+
+    state_file = Path(args.state_file).expanduser() if args.state_file else None
+    species_items = (
+        [item for item in get_editor_species_list() if item["slug"] == args.slug]
+        if args.slug
+        else get_editor_species_list()
+    )
+    if not args.slug:
+        species_items = reorder_species_with_cursor(species_items, state_file)
+
+    if args.slug and not species_items:
+        print(f"Species not found: {args.slug}")
+        return 1
+
+    species_count = 0
+    citation_count = 0
+    changed_count = 0
+    resolved_count = 0
+    unresolved_count = 0
+    error_count = 0
+    last_seen_slug = ""
+
+    for species in species_items:
+        if args.max_species and species_count >= args.max_species:
+            break
+        slug = str(species["slug"])
+        last_seen_slug = slug
+        citation_payload = get_editor_species_citations(slug)
+        if citation_payload is None:
+            continue
+
+        eligible = [
+            citation
+            for citation in citation_payload["citations"]
+            if should_backfill(citation, include_accepted=args.include_accepted)
+        ]
+        if not eligible:
+            continue
+
+        species_count += 1
+        print(f"[{slug}] backfilling {len(eligible)} citation(s)", flush=True)
+
+        for citation in eligible:
+            if args.max_citations and citation_count >= args.max_citations:
+                write_cursor(state_file, last_seen_slug)
+                print("citation limit reached; stopping early", flush=True)
+                print(
+                    "summary:"
+                    f" species={species_count}"
+                    f" citations={citation_count}"
+                    f" changed={changed_count}"
+                    f" resolved={resolved_count}"
+                    f" unresolved={unresolved_count}"
+                    f" errors={error_count}",
+                    flush=True,
+                )
+                return 0
+            citation_count += 1
+            result = update_species_citation_enrichment(
+                slug=slug,
+                citation_id=int(citation["id"]),
+                username=args.username,
+            )
+            if result is None:
+                print(f"  - citation {citation['id']}: skipped (not found)", flush=True)
+                continue
+
+            changed_fields = result.get("changed_fields", {})
+            status = str(result["citation"].get("enrichment_status", "")).strip().lower()
+            if changed_fields:
+                changed_count += 1
+            if status == "resolved":
+                resolved_count += 1
+            elif status == "unresolved":
+                unresolved_count += 1
+            elif status == "error":
+                error_count += 1
+            print(
+                f"  - citation {citation['id']}: {status or 'unknown'}"
+                + (f" ({len(changed_fields)} field changes)" if changed_fields else "")
+            , flush=True)
+
+    write_cursor(state_file, last_seen_slug)
+    print(
+        "summary:"
+        f" species={species_count}"
+        f" citations={citation_count}"
+        f" changed={changed_count}"
+        f" resolved={resolved_count}"
+        f" unresolved={unresolved_count}"
+        f" errors={error_count}",
+        flush=True,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/scripts/backup-postgres.sh
+++ b/scripts/backup-postgres.sh
@ -0,0 +1,28 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+ENV_FILE="${ECOSPECIES_ENV_FILE:-$ROOT_DIR/docs/docker-compose-traefik.env}"
+COMPOSE_FILE="${ECOSPECIES_COMPOSE_FILE:-$ROOT_DIR/docs/docker-compose-traefik.yml}"
+OUTPUT_FILE="${1:-$ROOT_DIR/ecospecies-backup.sql}"
+
+if [ ! -f "$ENV_FILE" ]; then
+  echo "Missing env file: $ENV_FILE" >&2
+  exit 1
+fi
+
+set -a
+. "$ENV_FILE"
+set +a
+
+DB_USER="${ECOSPECIES_DB_USER:-ecospecies}"
+DB_NAME="${ECOSPECIES_DB_NAME:-ecospecies}"
+
+docker compose \
+  --env-file "$ENV_FILE" \
+  -f "$COMPOSE_FILE" \
+  exec -T db \
+  pg_dump -U "$DB_USER" "$DB_NAME" \
+  > "$OUTPUT_FILE"
+
+printf 'Backup written to %s\n' "$OUTPUT_FILE"
--- a/scripts/restore-postgres.sh
+++ b/scripts/restore-postgres.sh
@ -0,0 +1,37 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+ENV_FILE="${ECOSPECIES_ENV_FILE:-$ROOT_DIR/docs/docker-compose-traefik.env}"
+COMPOSE_FILE="${ECOSPECIES_COMPOSE_FILE:-$ROOT_DIR/docs/docker-compose-traefik.yml}"
+INPUT_FILE="${1:-}"
+
+if [ -z "$INPUT_FILE" ]; then
+  echo "Usage: $0 <sql-backup-file>" >&2
+  exit 1
+fi
+
+if [ ! -f "$ENV_FILE" ]; then
+  echo "Missing env file: $ENV_FILE" >&2
+  exit 1
+fi
+
+if [ ! -f "$INPUT_FILE" ]; then
+  echo "Missing backup file: $INPUT_FILE" >&2
+  exit 1
+fi
+
+set -a
+. "$ENV_FILE"
+set +a
+
+DB_USER="${ECOSPECIES_DB_USER:-ecospecies}"
+DB_NAME="${ECOSPECIES_DB_NAME:-ecospecies}"
+
+cat "$INPUT_FILE" | docker compose \
+  --env-file "$ENV_FILE" \
+  -f "$COMPOSE_FILE" \
+  exec -T db \
+  psql -U "$DB_USER" "$DB_NAME"
+
+printf 'Restore completed from %s\n' "$INPUT_FILE"
--- a/scripts/run-citation-backfill.sh
+++ b/scripts/run-citation-backfill.sh
@ -0,0 +1,21 @@
+#!/bin/sh
+set -eu
+
+ROOT_DIR="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)"
+LOG_DIR="${ECOSPECIES_BACKFILL_LOG_DIR:-$ROOT_DIR/var/logs}"
+STATE_FILE="${ECOSPECIES_BACKFILL_STATE_FILE:-$ROOT_DIR/var/citation-backfill.cursor}"
+LOCK_DIR="${ECOSPECIES_BACKFILL_LOCK_DIR:-$ROOT_DIR/var/citation-backfill.lock}"
+MAX_SPECIES="${ECOSPECIES_BACKFILL_MAX_SPECIES:-3}"
+
+mkdir -p "$LOG_DIR"
+mkdir -p "$ROOT_DIR/var"
+
+if ! mkdir "$LOCK_DIR" 2>/dev/null; then
+  echo "citation backfill already running; skipping"
+  exit 0
+fi
+
+trap 'rmdir "$LOCK_DIR"' EXIT INT TERM
+
+exec docker exec ecospecies-api /bin/sh -lc \
+  "PYTHONPATH=/workspace/apps/api/src /workspace/.docker/venv/bin/python -u /workspace/scripts/backfill-citations.py --username citation-backfill --max-species ${MAX_SPECIES} --state-file ${STATE_FILE}"